LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - ScheduleDAGRRList.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1034 1160 89.1 %
Date: 2018-07-13 00:08:38 Functions: 84 99 84.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This implements bottom-up and top-down register pressure reduction list
      11             : // schedulers, using standard algorithms.  The basic approach uses a priority
      12             : // queue of available nodes to schedule.  One at a time, nodes are taken from
      13             : // the priority queue (thus in priority order), checked for legality to
      14             : // schedule, and emitted if legal.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #include "ScheduleDAGSDNodes.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/DenseMap.h"
      21             : #include "llvm/ADT/STLExtras.h"
      22             : #include "llvm/ADT/SmallSet.h"
      23             : #include "llvm/ADT/SmallVector.h"
      24             : #include "llvm/ADT/Statistic.h"
      25             : #include "llvm/CodeGen/ISDOpcodes.h"
      26             : #include "llvm/CodeGen/MachineFunction.h"
      27             : #include "llvm/CodeGen/MachineOperand.h"
      28             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      29             : #include "llvm/CodeGen/ScheduleDAG.h"
      30             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      31             : #include "llvm/CodeGen/SchedulerRegistry.h"
      32             : #include "llvm/CodeGen/SelectionDAGISel.h"
      33             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      34             : #include "llvm/CodeGen/TargetInstrInfo.h"
      35             : #include "llvm/CodeGen/TargetLowering.h"
      36             : #include "llvm/CodeGen/TargetOpcodes.h"
      37             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      39             : #include "llvm/Config/llvm-config.h"
      40             : #include "llvm/IR/InlineAsm.h"
      41             : #include "llvm/MC/MCInstrDesc.h"
      42             : #include "llvm/MC/MCRegisterInfo.h"
      43             : #include "llvm/Support/Casting.h"
      44             : #include "llvm/Support/CodeGen.h"
      45             : #include "llvm/Support/CommandLine.h"
      46             : #include "llvm/Support/Compiler.h"
      47             : #include "llvm/Support/Debug.h"
      48             : #include "llvm/Support/ErrorHandling.h"
      49             : #include "llvm/Support/MachineValueType.h"
      50             : #include "llvm/Support/raw_ostream.h"
      51             : #include <algorithm>
      52             : #include <cassert>
      53             : #include <cstdint>
      54             : #include <cstdlib>
      55             : #include <iterator>
      56             : #include <limits>
      57             : #include <memory>
      58             : #include <utility>
      59             : #include <vector>
      60             : 
      61             : using namespace llvm;
      62             : 
      63             : #define DEBUG_TYPE "pre-RA-sched"
      64             : 
      65             : STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
      66             : STATISTIC(NumUnfolds,    "Number of nodes unfolded");
      67             : STATISTIC(NumDups,       "Number of duplicated nodes");
      68             : STATISTIC(NumPRCopies,   "Number of physical register copies");
      69             : 
      70             : static RegisterScheduler
      71       99743 :   burrListDAGScheduler("list-burr",
      72             :                        "Bottom-up register reduction list scheduling",
      73             :                        createBURRListDAGScheduler);
      74             : 
      75             : static RegisterScheduler
      76       99743 :   sourceListDAGScheduler("source",
      77             :                          "Similar to list-burr but schedules in source "
      78             :                          "order when possible",
      79             :                          createSourceListDAGScheduler);
      80             : 
      81             : static RegisterScheduler
      82       99743 :   hybridListDAGScheduler("list-hybrid",
      83             :                          "Bottom-up register pressure aware list scheduling "
      84             :                          "which tries to balance latency and register pressure",
      85             :                          createHybridListDAGScheduler);
      86             : 
      87             : static RegisterScheduler
      88       99743 :   ILPListDAGScheduler("list-ilp",
      89             :                       "Bottom-up register pressure aware list scheduling "
      90             :                       "which tries to balance ILP and register pressure",
      91             :                       createILPListDAGScheduler);
      92             : 
      93       99743 : static cl::opt<bool> DisableSchedCycles(
      94      199486 :   "disable-sched-cycles", cl::Hidden, cl::init(false),
      95      199486 :   cl::desc("Disable cycle-level precision during preRA scheduling"));
      96             : 
      97             : // Temporary sched=list-ilp flags until the heuristics are robust.
      98             : // Some options are also available under sched=list-hybrid.
      99       99743 : static cl::opt<bool> DisableSchedRegPressure(
     100      199486 :   "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
     101      199486 :   cl::desc("Disable regpressure priority in sched=list-ilp"));
     102       99743 : static cl::opt<bool> DisableSchedLiveUses(
     103      199486 :   "disable-sched-live-uses", cl::Hidden, cl::init(true),
     104      199486 :   cl::desc("Disable live use priority in sched=list-ilp"));
     105       99743 : static cl::opt<bool> DisableSchedVRegCycle(
     106      199486 :   "disable-sched-vrcycle", cl::Hidden, cl::init(false),
     107      199486 :   cl::desc("Disable virtual register cycle interference checks"));
     108       99743 : static cl::opt<bool> DisableSchedPhysRegJoin(
     109      199486 :   "disable-sched-physreg-join", cl::Hidden, cl::init(false),
     110      199486 :   cl::desc("Disable physreg def-use affinity"));
     111       99743 : static cl::opt<bool> DisableSchedStalls(
     112      199486 :   "disable-sched-stalls", cl::Hidden, cl::init(true),
     113      199486 :   cl::desc("Disable no-stall priority in sched=list-ilp"));
     114       99743 : static cl::opt<bool> DisableSchedCriticalPath(
     115      199486 :   "disable-sched-critical-path", cl::Hidden, cl::init(false),
     116      199486 :   cl::desc("Disable critical path priority in sched=list-ilp"));
     117       99743 : static cl::opt<bool> DisableSchedHeight(
     118      199486 :   "disable-sched-height", cl::Hidden, cl::init(false),
     119      199486 :   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
     120       99743 : static cl::opt<bool> Disable2AddrHack(
     121      199486 :   "disable-2addr-hack", cl::Hidden, cl::init(true),
     122      199486 :   cl::desc("Disable scheduler's two-address hack"));
     123             : 
     124       99743 : static cl::opt<int> MaxReorderWindow(
     125      199486 :   "max-sched-reorder", cl::Hidden, cl::init(6),
     126       99743 :   cl::desc("Number of instructions to allow ahead of the critical path "
     127       99743 :            "in sched=list-ilp"));
     128             : 
     129       99743 : static cl::opt<unsigned> AvgIPC(
     130      199486 :   "sched-avg-ipc", cl::Hidden, cl::init(1),
     131      199486 :   cl::desc("Average inst/cycle whan no target itinerary exists."));
     132             : 
     133             : namespace {
     134             : 
     135             : //===----------------------------------------------------------------------===//
     136             : /// ScheduleDAGRRList - The actual register reduction list scheduler
     137             : /// implementation.  This supports both top-down and bottom-up scheduling.
     138             : ///
     139             : class ScheduleDAGRRList : public ScheduleDAGSDNodes {
     140             : private:
     141             :   /// NeedLatency - True if the scheduler will make use of latency information.
     142             :   bool NeedLatency;
     143             : 
     144             :   /// AvailableQueue - The priority queue to use for the available SUnits.
     145             :   SchedulingPriorityQueue *AvailableQueue;
     146             : 
     147             :   /// PendingQueue - This contains all of the instructions whose operands have
     148             :   /// been issued, but their results are not ready yet (due to the latency of
     149             :   /// the operation).  Once the operands becomes available, the instruction is
     150             :   /// added to the AvailableQueue.
     151             :   std::vector<SUnit *> PendingQueue;
     152             : 
     153             :   /// HazardRec - The hazard recognizer to use.
     154             :   ScheduleHazardRecognizer *HazardRec;
     155             : 
     156             :   /// CurCycle - The current scheduler state corresponds to this cycle.
     157             :   unsigned CurCycle = 0;
     158             : 
     159             :   /// MinAvailableCycle - Cycle of the soonest available instruction.
     160             :   unsigned MinAvailableCycle;
     161             : 
     162             :   /// IssueCount - Count instructions issued in this cycle
     163             :   /// Currently valid only for bottom-up scheduling.
     164             :   unsigned IssueCount;
     165             : 
     166             :   /// LiveRegDefs - A set of physical registers and their definition
     167             :   /// that are "live". These nodes must be scheduled before any other nodes that
     168             :   /// modifies the registers can be scheduled.
     169             :   unsigned NumLiveRegs;
     170             :   std::unique_ptr<SUnit*[]> LiveRegDefs;
     171             :   std::unique_ptr<SUnit*[]> LiveRegGens;
     172             : 
     173             :   // Collect interferences between physical register use/defs.
     174             :   // Each interference is an SUnit and set of physical registers.
     175             :   SmallVector<SUnit*, 4> Interferences;
     176             : 
     177             :   using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
     178             : 
     179             :   LRegsMapT LRegsMap;
     180             : 
     181             :   /// Topo - A topological ordering for SUnits which permits fast IsReachable
     182             :   /// and similar queries.
     183             :   ScheduleDAGTopologicalSort Topo;
     184             : 
     185             :   // Hack to keep track of the inverse of FindCallSeqStart without more crazy
     186             :   // DAG crawling.
     187             :   DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
     188             : 
     189             : public:
     190      363255 :   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
     191             :                     SchedulingPriorityQueue *availqueue,
     192             :                     CodeGenOpt::Level OptLevel)
     193      363255 :     : ScheduleDAGSDNodes(mf),
     194             :       NeedLatency(needlatency), AvailableQueue(availqueue),
     195     1089765 :       Topo(SUnits, nullptr) {
     196      363255 :     const TargetSubtargetInfo &STI = mf.getSubtarget();
     197      363255 :     if (DisableSchedCycles || !NeedLatency)
     198      668986 :       HazardRec = new ScheduleHazardRecognizer();
     199             :     else
     200       28762 :       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
     201      363255 :   }
     202             : 
     203     1453020 :   ~ScheduleDAGRRList() override {
     204      363255 :     delete HazardRec;
     205      363255 :     delete AvailableQueue;
     206      726510 :   }
     207             : 
     208             :   void Schedule() override;
     209             : 
     210             :   ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
     211             : 
     212             :   /// IsReachable - Checks if SU is reachable from TargetSU.
     213             :   bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
     214         116 :     return Topo.IsReachable(SU, TargetSU);
     215             :   }
     216             : 
     217             :   /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
     218             :   /// create a cycle.
     219             :   bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
     220       24741 :     return Topo.WillCreateCycle(SU, TargetSU);
     221             :   }
     222             : 
     223             :   /// AddPred - adds a predecessor edge to SUnit SU.
     224             :   /// This returns true if this is a new predecessor.
     225             :   /// Updates the topological ordering if required.
     226      101753 :   void AddPred(SUnit *SU, const SDep &D) {
     227      203506 :     Topo.AddPred(SU, D.getSUnit());
     228      101753 :     SU->addPred(D);
     229      101753 :   }
     230             : 
     231             :   /// RemovePred - removes a predecessor edge from SUnit SU.
     232             :   /// This returns true if an edge was removed.
     233             :   /// Updates the topological ordering if required.
     234       48518 :   void RemovePred(SUnit *SU, const SDep &D) {
     235       97036 :     Topo.RemovePred(SU, D.getSUnit());
     236       48518 :     SU->removePred(D);
     237       48518 :   }
     238             : 
     239             : private:
     240             :   bool isReady(SUnit *SU) {
     241     3656649 :     return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
     242           0 :       AvailableQueue->isReady(SU);
     243             :   }
     244             : 
     245             :   void ReleasePred(SUnit *SU, const SDep *PredEdge);
     246             :   void ReleasePredecessors(SUnit *SU);
     247             :   void ReleasePending();
     248             :   void AdvanceToCycle(unsigned NextCycle);
     249             :   void AdvancePastStalls(SUnit *SU);
     250             :   void EmitNode(SUnit *SU);
     251             :   void ScheduleNodeBottomUp(SUnit*);
     252             :   void CapturePred(SDep *PredEdge);
     253             :   void UnscheduleNodeBottomUp(SUnit*);
     254             :   void RestoreHazardCheckerBottomUp();
     255             :   void BacktrackBottomUp(SUnit*, SUnit*);
     256             :   SUnit *TryUnfoldSU(SUnit *);
     257             :   SUnit *CopyAndMoveSuccessors(SUnit*);
     258             :   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
     259             :                                 const TargetRegisterClass*,
     260             :                                 const TargetRegisterClass*,
     261             :                                 SmallVectorImpl<SUnit*>&);
     262             :   bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
     263             : 
     264             :   void releaseInterferences(unsigned Reg = 0);
     265             : 
     266             :   SUnit *PickNodeToScheduleBottomUp();
     267             :   void ListScheduleBottomUp();
     268             : 
     269             :   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
     270             :   /// Updates the topological ordering if required.
     271       13414 :   SUnit *CreateNewSUnit(SDNode *N) {
     272       26828 :     unsigned NumSUnits = SUnits.size();
     273       13414 :     SUnit *NewNode = newSUnit(N);
     274             :     // Update the topological ordering.
     275       13414 :     if (NewNode->NodeNum >= NumSUnits)
     276       13414 :       Topo.InitDAGTopologicalSorting();
     277       13414 :     return NewNode;
     278             :   }
     279             : 
     280             :   /// CreateClone - Creates a new SUnit from an existing one.
     281             :   /// Updates the topological ordering if required.
     282       20441 :   SUnit *CreateClone(SUnit *N) {
     283       40882 :     unsigned NumSUnits = SUnits.size();
     284       20441 :     SUnit *NewNode = Clone(N);
     285             :     // Update the topological ordering.
     286       20441 :     if (NewNode->NodeNum >= NumSUnits)
     287       20441 :       Topo.InitDAGTopologicalSorting();
     288       20441 :     return NewNode;
     289             :   }
     290             : 
     291             :   /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
     292             :   /// need actual latency information but the hybrid scheduler does.
     293     4328456 :   bool forceUnitLatencies() const override {
     294     9162359 :     return !NeedLatency;
     295             :   }
     296             : };
     297             : 
     298             : }  // end anonymous namespace
     299             : 
     300             : /// GetCostForDef - Looks up the register class and cost for a given definition.
     301             : /// Typically this just means looking up the representative register class,
     302             : /// but for untyped values (MVT::Untyped) it means inspecting the node's
     303             : /// opcode to determine what register class is being generated.
     304      531739 : static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
     305             :                           const TargetLowering *TLI,
     306             :                           const TargetInstrInfo *TII,
     307             :                           const TargetRegisterInfo *TRI,
     308             :                           unsigned &RegClass, unsigned &Cost,
     309             :                           const MachineFunction &MF) {
     310             :   MVT VT = RegDefPos.GetValue();
     311             : 
     312             :   // Special handling for untyped values.  These values can only come from
     313             :   // the expansion of custom DAG-to-DAG patterns.
     314      531739 :   if (VT == MVT::Untyped) {
     315        1964 :     const SDNode *Node = RegDefPos.GetNode();
     316             : 
     317             :     // Special handling for CopyFromReg of untyped values.
     318        2100 :     if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
     319         272 :       unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     320         136 :       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
     321         272 :       RegClass = RC->getID();
     322         136 :       Cost = 1;
     323         527 :       return;
     324             :     }
     325             : 
     326             :     unsigned Opcode = Node->getMachineOpcode();
     327        1828 :     if (Opcode == TargetOpcode::REG_SEQUENCE) {
     328         510 :       unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
     329         255 :       const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
     330         510 :       RegClass = RC->getID();
     331         255 :       Cost = 1;
     332         255 :       return;
     333             :     }
     334             : 
     335        1573 :     unsigned Idx = RegDefPos.GetIdx();
     336        3146 :     const MCInstrDesc Desc = TII->get(Opcode);
     337        1573 :     const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
     338        3146 :     RegClass = RC->getID();
     339             :     // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
     340             :     // better way to determine it.
     341        1573 :     Cost = 1;
     342             :   } else {
     343     1059550 :     RegClass = TLI->getRepRegClassFor(VT)->getID();
     344      529775 :     Cost = TLI->getRepRegClassCostFor(VT);
     345             :   }
     346             : }
     347             : 
     348             : /// Schedule - Schedule the DAG using list scheduling.
     349      363255 : void ScheduleDAGRRList::Schedule() {
     350             :   LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
     351             :                     << " '" << BB->getName() << "' **********\n");
     352             : 
     353      363255 :   CurCycle = 0;
     354      363255 :   IssueCount = 0;
     355      363255 :   MinAvailableCycle =
     356      363255 :       DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
     357      363255 :   NumLiveRegs = 0;
     358             :   // Allocate slots for each physical register, plus one for a special register
     359             :   // to track the virtual resource of a calling sequence.
     360      363255 :   LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
     361      363255 :   LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
     362      363255 :   CallSeqEndForStart.clear();
     363             :   assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
     364             : 
     365             :   // Build the scheduling graph.
     366      363255 :   BuildSchedGraph(nullptr);
     367             : 
     368             :   LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
     369      363255 :   Topo.InitDAGTopologicalSorting();
     370             : 
     371      363255 :   AvailableQueue->initNodes(SUnits);
     372             : 
     373      363255 :   HazardRec->Reset();
     374             : 
     375             :   // Execute the actual scheduling loop.
     376      363255 :   ListScheduleBottomUp();
     377             : 
     378      363255 :   AvailableQueue->releaseState();
     379             : 
     380             :   LLVM_DEBUG({
     381             :     dbgs() << "*** Final schedule ***\n";
     382             :     dumpSchedule();
     383             :     dbgs() << '\n';
     384             :   });
     385      363255 : }
     386             : 
     387             : //===----------------------------------------------------------------------===//
     388             : //  Bottom-Up Scheduling
     389             : //===----------------------------------------------------------------------===//
     390             : 
     391             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
     392             : /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
     393     4833903 : void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
     394     4833903 :   SUnit *PredSU = PredEdge->getSUnit();
     395             : 
     396             : #ifndef NDEBUG
     397             :   if (PredSU->NumSuccsLeft == 0) {
     398             :     dbgs() << "*** Scheduling failed! ***\n";
     399             :     PredSU->dump(this);
     400             :     dbgs() << " has been released too many times!\n";
     401             :     llvm_unreachable(nullptr);
     402             :   }
     403             : #endif
     404     4833903 :   --PredSU->NumSuccsLeft;
     405             : 
     406     4833903 :   if (!forceUnitLatencies()) {
     407             :     // Updating predecessor's height. This is now the cycle when the
     408             :     // predecessor can be scheduled without causing a pipeline stall.
     409      243305 :     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
     410             :   }
     411             : 
     412             :   // If all the node's successors are scheduled, this node is ready
     413             :   // to be scheduled. Ignore the special EntrySU node.
     414     4833903 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
     415     3656649 :     PredSU->isAvailable = true;
     416             : 
     417             :     unsigned Height = PredSU->getHeight();
     418     3656649 :     if (Height < MinAvailableCycle)
     419     1584909 :       MinAvailableCycle = Height;
     420             : 
     421     3656649 :     if (isReady(PredSU)) {
     422     3656649 :       AvailableQueue->push(PredSU);
     423             :     }
     424             :     // CapturePred and others may have left the node in the pending queue, avoid
     425             :     // adding it twice.
     426           0 :     else if (!PredSU->isPending) {
     427           0 :       PredSU->isPending = true;
     428           0 :       PendingQueue.push_back(PredSU);
     429             :     }
     430             :   }
     431     4833903 : }
     432             : 
     433             : /// IsChainDependent - Test if Outer is reachable from Inner through
     434             : /// chain dependencies.
     435        2208 : static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
     436             :                              unsigned NestLevel,
     437             :                              const TargetInstrInfo *TII) {
     438             :   SDNode *N = Outer;
     439             :   while (true) {
     440        4484 :     if (N == Inner)
     441             :       return true;
     442             :     // For a TokenFactor, examine each operand. There may be multiple ways
     443             :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     444             :     // most nesting in order to ensure that we find the corresponding match.
     445        8966 :     if (N->getOpcode() == ISD::TokenFactor) {
     446        1739 :       for (const SDValue &Op : N->op_values())
     447        1318 :         if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
     448             :           return true;
     449             :       return false;
     450             :     }
     451             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     452        4062 :     if (N->isMachineOpcode()) {
     453        3617 :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     454           0 :         ++NestLevel;
     455        3617 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     456        1786 :         if (NestLevel == 0)
     457             :           return false;
     458           0 :         --NestLevel;
     459             :       }
     460             :     }
     461             :     // Otherwise, find the chain and continue climbing.
     462       11469 :     for (const SDValue &Op : N->op_values())
     463       11469 :       if (Op.getValueType() == MVT::Other) {
     464        2276 :         N = Op.getNode();
     465             :         goto found_chain_operand;
     466             :       }
     467             :     return false;
     468             :   found_chain_operand:;
     469        2276 :     if (N->getOpcode() == ISD::EntryToken)
     470             :       return false;
     471             :   }
     472             : }
     473             : 
     474             : /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
     475             : /// the corresponding (lowered) CALLSEQ_BEGIN node.
     476             : ///
     477             : /// NestLevel and MaxNested are used in recursion to indcate the current level
     478             : /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
     479             : /// level seen so far.
     480             : ///
     481             : /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
     482             : /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
     483             : static SDNode *
     484      226954 : FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
     485             :                  const TargetInstrInfo *TII) {
     486             :   while (true) {
     487             :     // For a TokenFactor, examine each operand. There may be multiple ways
     488             :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     489             :     // most nesting in order to ensure that we find the corresponding match.
     490     1938946 :     if (N->getOpcode() == ISD::TokenFactor) {
     491             :       SDNode *Best = nullptr;
     492       14403 :       unsigned BestMaxNest = MaxNest;
     493       67249 :       for (const SDValue &Op : N->op_values()) {
     494       52846 :         unsigned MyNestLevel = NestLevel;
     495       52846 :         unsigned MyMaxNest = MaxNest;
     496      105692 :         if (SDNode *New = FindCallSeqStart(Op.getNode(),
     497       52846 :                                            MyNestLevel, MyMaxNest, TII))
     498       52809 :           if (!Best || (MyMaxNest > BestMaxNest)) {
     499             :             Best = New;
     500       14403 :             BestMaxNest = MyMaxNest;
     501             :           }
     502             :       }
     503             :       assert(Best);
     504       14403 :       MaxNest = BestMaxNest;
     505       14403 :       return Best;
     506             :     }
     507             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     508      955070 :     if (N->isMachineOpcode()) {
     509      640193 :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     510      174109 :         ++NestLevel;
     511      174109 :         MaxNest = std::max(MaxNest, NestLevel);
     512      466084 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     513             :         assert(NestLevel != 0);
     514      212515 :         --NestLevel;
     515      212515 :         if (NestLevel == 0)
     516             :           return N;
     517             :       }
     518             :     }
     519             :     // Otherwise, find the chain and continue climbing.
     520     2250097 :     for (const SDValue &Op : N->op_values())
     521     2250097 :       if (Op.getValueType() == MVT::Other) {
     522      742556 :         N = Op.getNode();
     523             :         goto found_chain_operand;
     524             :       }
     525             :     return nullptr;
     526             :   found_chain_operand:;
     527      742556 :     if (N->getOpcode() == ISD::EntryToken)
     528             :       return nullptr;
     529             :   }
     530             : }
     531             : 
     532             : /// Call ReleasePred for each predecessor, then update register live def/gen.
     533             : /// Always update LiveRegDefs for a register dependence even if the current SU
     534             : /// also defines the register. This effectively create one large live range
     535             : /// across a sequence of two-address node. This is important because the
     536             : /// entire chain must be scheduled together. Example:
     537             : ///
     538             : /// flags = (3) add
     539             : /// flags = (2) addc flags
     540             : /// flags = (1) addc flags
     541             : ///
     542             : /// results in
     543             : ///
     544             : /// LiveRegDefs[flags] = 3
     545             : /// LiveRegGens[flags] = 1
     546             : ///
     547             : /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
     548             : /// interference on flags.
     549     4385027 : void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
     550             :   // Bottom up: release predecessors
     551    14052833 :   for (SDep &Pred : SU->Preds) {
     552     4833903 :     ReleasePred(SU, &Pred);
     553             :     if (Pred.isAssignedRegDep()) {
     554             :       // This is a physical register dependency and it's impossible or
     555             :       // expensive to copy the register. Make sure nothing that can
     556             :       // clobber the register is scheduled between the predecessor and
     557             :       // this node.
     558      140224 :       SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
     559             :       assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
     560             :              "interference on register dependence");
     561      140224 :       LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
     562      140224 :       if (!LiveRegGens[Pred.getReg()]) {
     563      136084 :         ++NumLiveRegs;
     564      136084 :         LiveRegGens[Pred.getReg()] = SU;
     565             :       }
     566             :     }
     567             :   }
     568             : 
     569             :   // If we're scheduling a lowered CALLSEQ_END, find the corresponding
     570             :   // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
     571             :   // these nodes, to prevent other calls from being interscheduled with them.
     572     4385027 :   unsigned CallResource = TRI->getNumRegs();
     573     8770054 :   if (!LiveRegDefs[CallResource])
     574     3810412 :     for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
     575     6361878 :       if (Node->isMachineOpcode() &&
     576     2533013 :           Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     577      174108 :         unsigned NestLevel = 0;
     578      174108 :         unsigned MaxNest = 0;
     579      174108 :         SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
     580             :         assert(N && "Must find call sequence start");
     581             : 
     582      348216 :         SUnit *Def = &SUnits[N->getNodeId()];
     583      348216 :         CallSeqEndForStart[Def] = SU;
     584             : 
     585      174108 :         ++NumLiveRegs;
     586      174108 :         LiveRegDefs[CallResource] = Def;
     587      174108 :         LiveRegGens[CallResource] = SU;
     588             :         break;
     589             :       }
     590     4385027 : }
     591             : 
     592             : /// Check to see if any of the pending instructions are ready to issue.  If
     593             : /// so, add them to the available queue.
     594     3993186 : void ScheduleDAGRRList::ReleasePending() {
     595     3993186 :   if (DisableSchedCycles) {
     596             :     assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
     597             :     return;
     598             :   }
     599             : 
     600             :   // If the available queue is empty, it is safe to reset MinAvailableCycle.
     601     3993186 :   if (AvailableQueue->empty())
     602     1919699 :     MinAvailableCycle = std::numeric_limits<unsigned>::max();
     603             : 
     604             :   // Check to see if any of the pending instructions are ready to issue.  If
     605             :   // so, add them to the available queue.
     606     7986372 :   for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
     607           0 :     unsigned ReadyCycle = PendingQueue[i]->getHeight();
     608           0 :     if (ReadyCycle < MinAvailableCycle)
     609           0 :       MinAvailableCycle = ReadyCycle;
     610             : 
     611           0 :     if (PendingQueue[i]->isAvailable) {
     612           0 :       if (!isReady(PendingQueue[i]))
     613           0 :           continue;
     614           0 :       AvailableQueue->push(PendingQueue[i]);
     615             :     }
     616           0 :     PendingQueue[i]->isPending = false;
     617           0 :     PendingQueue[i] = PendingQueue.back();
     618             :     PendingQueue.pop_back();
     619           0 :     --i; --e;
     620             :   }
     621             : }
     622             : 
     623             : /// Move the scheduler state forward by the specified number of Cycles.
     624    11816948 : void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
     625    11816948 :   if (NextCycle <= CurCycle)
     626             :     return;
     627             : 
     628     3992327 :   IssueCount = 0;
     629     3992327 :   AvailableQueue->setCurCycle(NextCycle);
     630     3992327 :   if (!HazardRec->isEnabled()) {
     631             :     // Bypass lots of virtual calls in case of long latency.
     632     3943374 :     CurCycle = NextCycle;
     633             :   }
     634             :   else {
     635      278921 :     for (; CurCycle != NextCycle; ++CurCycle) {
     636      114984 :       HazardRec->RecedeCycle();
     637             :     }
     638             :   }
     639             :   // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
     640             :   // available Q to release pending nodes at least once before popping.
     641     3992327 :   ReleasePending();
     642             : }
     643             : 
     644             : /// Move the scheduler state forward until the specified node's dependents are
     645             : /// ready and can be scheduled with no resource conflicts.
     646     4021772 : void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
     647     4021772 :   if (DisableSchedCycles)
     648             :     return;
     649             : 
     650             :   // FIXME: Nodes such as CopyFromReg probably should not advance the current
     651             :   // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
     652             :   // has predecessors the cycle will be advanced when they are scheduled.
     653             :   // But given the crude nature of modeling latency though such nodes, we
     654             :   // currently need to treat these nodes like real instructions.
     655             :   // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
     656             : 
     657             :   unsigned ReadyCycle = SU->getHeight();
     658             : 
     659             :   // Bump CurCycle to account for latency. We assume the latency of other
     660             :   // available instructions may be hidden by the stall (not a full pipe stall).
     661             :   // This updates the hazard recognizer's cycle before reserving resources for
     662             :   // this instruction.
     663     4021772 :   AdvanceToCycle(ReadyCycle);
     664             : 
     665             :   // Calls are scheduled in their preceding cycle, so don't conflict with
     666             :   // hazards from instructions after the call. EmitNode will reset the
     667             :   // scoreboard state before emitting the call.
     668     4021772 :   if (SU->isCall)
     669             :     return;
     670             : 
     671             :   // FIXME: For resource conflicts in very long non-pipelined stages, we
     672             :   // should probably skip ahead here to avoid useless scoreboard checks.
     673             :   int Stalls = 0;
     674             :   while (true) {
     675             :     ScheduleHazardRecognizer::HazardType HT =
     676     3857400 :       HazardRec->getHazardType(SU, -Stalls);
     677             : 
     678     3857400 :     if (HT == ScheduleHazardRecognizer::NoHazard)
     679             :       break;
     680             : 
     681        9439 :     ++Stalls;
     682        9439 :   }
     683     3847961 :   AdvanceToCycle(CurCycle + Stalls);
     684             : }
     685             : 
     686             : /// Record this SUnit in the HazardRecognizer.
     687             : /// Does not update CurCycle.
     688     4021784 : void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     689     4021784 :   if (!HazardRec->isEnabled())
     690             :     return;
     691             : 
     692             :   // Check for phys reg copy.
     693       79351 :   if (!SU->getNode())
     694             :     return;
     695             : 
     696      158698 :   switch (SU->getNode()->getOpcode()) {
     697             :   default:
     698             :     assert(SU->getNode()->isMachineOpcode() &&
     699             :            "This target-independent node should not be scheduled.");
     700             :     break;
     701             :   case ISD::MERGE_VALUES:
     702             :   case ISD::TokenFactor:
     703             :   case ISD::LIFETIME_START:
     704             :   case ISD::LIFETIME_END:
     705             :   case ISD::CopyToReg:
     706             :   case ISD::CopyFromReg:
     707             :   case ISD::EH_LABEL:
     708             :     // Noops don't affect the scoreboard state. Copies are likely to be
     709             :     // removed.
     710             :     return;
     711         165 :   case ISD::INLINEASM:
     712             :     // For inline asm, clear the pipeline state.
     713         165 :     HazardRec->Reset();
     714             :     return;
     715             :   }
     716       56331 :   if (SU->isCall) {
     717             :     // Calls are scheduled with their preceding instructions. For bottom-up
     718             :     // scheduling, clear the pipeline state before emitting.
     719        1282 :     HazardRec->Reset();
     720             :   }
     721             : 
     722       56331 :   HazardRec->EmitInstruction(SU);
     723             : }
     724             : 
     725             : static void resetVRegCycle(SUnit *SU);
     726             : 
     727             : /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
     728             : /// count of its predecessors. If a predecessor pending count is zero, add it to
     729             : /// the Available queue.
     730     4021772 : void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
     731             :   LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
     732             :   LLVM_DEBUG(SU->dump(this));
     733             : 
     734             : #ifndef NDEBUG
     735             :   if (CurCycle < SU->getHeight())
     736             :     LLVM_DEBUG(dbgs() << "   Height [" << SU->getHeight()
     737             :                       << "] pipeline stall!\n");
     738             : #endif
     739             : 
     740             :   // FIXME: Do not modify node height. It may interfere with
     741             :   // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
     742             :   // node its ready cycle can aid heuristics, and after scheduling it can
     743             :   // indicate the scheduled cycle.
     744     4021772 :   SU->setHeightToAtLeast(CurCycle);
     745             : 
     746             :   // Reserve resources for the scheduled instruction.
     747     4021772 :   EmitNode(SU);
     748             : 
     749     4021772 :   Sequence.push_back(SU);
     750             : 
     751     4021772 :   AvailableQueue->scheduledNode(SU);
     752             : 
     753             :   // If HazardRec is disabled, and each inst counts as one cycle, then
     754             :   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
     755             :   // PendingQueue for schedulers that implement HasReadyFilter.
     756     7964205 :   if (!HazardRec->isEnabled() && AvgIPC < 2)
     757     3942433 :     AdvanceToCycle(CurCycle + 1);
     758             : 
     759             :   // Update liveness of predecessors before successors to avoid treating a
     760             :   // two-address node as a live range def.
     761     4021772 :   ReleasePredecessors(SU);
     762             : 
     763             :   // Release all the implicit physical register defs that are live.
     764    17810708 :   for (SDep &Succ : SU->Succs) {
     765             :     // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
     766      278268 :     if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
     767             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     768      135225 :       --NumLiveRegs;
     769      135225 :       LiveRegDefs[Succ.getReg()] = nullptr;
     770      135225 :       LiveRegGens[Succ.getReg()] = nullptr;
     771      135225 :       releaseInterferences(Succ.getReg());
     772             :     }
     773             :   }
     774             :   // Release the special call resource dependence, if this is the beginning
     775             :   // of a call.
     776     4021772 :   unsigned CallResource = TRI->getNumRegs();
     777     8043544 :   if (LiveRegDefs[CallResource] == SU)
     778      174109 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     779             :          SUNode = SUNode->getGluedNode()) {
     780      353202 :       if (SUNode->isMachineOpcode() &&
     781      175785 :           SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     782             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     783      174109 :         --NumLiveRegs;
     784      174109 :         LiveRegDefs[CallResource] = nullptr;
     785      174109 :         LiveRegGens[CallResource] = nullptr;
     786      174109 :         releaseInterferences(CallResource);
     787             :       }
     788             :     }
     789             : 
     790     4021772 :   resetVRegCycle(SU);
     791             : 
     792     4021772 :   SU->isScheduled = true;
     793             : 
     794             :   // Conditions under which the scheduler should eagerly advance the cycle:
     795             :   // (1) No available instructions
     796             :   // (2) All pipelines full, so available instructions must have hazards.
     797             :   //
     798             :   // If HazardRec is disabled, the cycle was pre-advanced before calling
     799             :   // ReleasePredecessors. In that case, IssueCount should remain 0.
     800             :   //
     801             :   // Check AvailableQueue after ReleasePredecessors in case of zero latency.
     802     7964205 :   if (HazardRec->isEnabled() || AvgIPC > 1) {
     803       79339 :     if (SU->getNode() && SU->getNode()->isMachineOpcode())
     804       56322 :       ++IssueCount;
     805      158678 :     if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
     806      153896 :         || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
     807        4782 :       AdvanceToCycle(CurCycle + 1);
     808             :   }
     809     4021772 : }
     810             : 
     811             : /// CapturePred - This does the opposite of ReleasePred. Since SU is being
     812             : /// unscheduled, increase the succ left count of its predecessors. Remove
     813             : /// them from AvailableQueue if necessary.
     814       12639 : void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
     815             :   SUnit *PredSU = PredEdge->getSUnit();
     816       12639 :   if (PredSU->isAvailable) {
     817        2895 :     PredSU->isAvailable = false;
     818        2895 :     if (!PredSU->isPending)
     819        1772 :       AvailableQueue->remove(PredSU);
     820             :   }
     821             : 
     822             :   assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
     823             :          "NumSuccsLeft will overflow!");
     824       12639 :   ++PredSU->NumSuccsLeft;
     825       12639 : }
     826             : 
     827             : /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
     828             : /// its predecessor states to reflect the change.
     829        8504 : void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
     830             :   LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
     831             :   LLVM_DEBUG(SU->dump(this));
     832             : 
     833       42286 :   for (SDep &Pred : SU->Preds) {
     834       12639 :     CapturePred(&Pred);
     835        3640 :     if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
     836             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     837             :       assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
     838             :              "Physical register dependency violated?");
     839         859 :       --NumLiveRegs;
     840         859 :       LiveRegDefs[Pred.getReg()] = nullptr;
     841         859 :       LiveRegGens[Pred.getReg()] = nullptr;
     842         859 :       releaseInterferences(Pred.getReg());
     843             :     }
     844             :   }
     845             : 
     846             :   // Reclaim the special call resource dependence, if this is the beginning
     847             :   // of a call.
     848        8504 :   unsigned CallResource = TRI->getNumRegs();
     849        8504 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     850             :        SUNode = SUNode->getGluedNode()) {
     851       18641 :     if (SUNode->isMachineOpcode() &&
     852        8377 :         SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     853           2 :       SUnit *SeqEnd = CallSeqEndForStart[SU];
     854             :       assert(SeqEnd && "Call sequence start/end must be known");
     855             :       assert(!LiveRegDefs[CallResource]);
     856             :       assert(!LiveRegGens[CallResource]);
     857           1 :       ++NumLiveRegs;
     858           2 :       LiveRegDefs[CallResource] = SU;
     859           1 :       LiveRegGens[CallResource] = SeqEnd;
     860             :     }
     861             :   }
     862             : 
     863             :   // Release the special call resource dependence, if this is the end
     864             :   // of a call.
     865       17008 :   if (LiveRegGens[CallResource] == SU)
     866           0 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     867             :          SUNode = SUNode->getGluedNode()) {
     868           0 :       if (SUNode->isMachineOpcode() &&
     869           0 :           SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     870             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     871             :         assert(LiveRegDefs[CallResource]);
     872             :         assert(LiveRegGens[CallResource]);
     873           0 :         --NumLiveRegs;
     874           0 :         LiveRegDefs[CallResource] = nullptr;
     875           0 :         LiveRegGens[CallResource] = nullptr;
     876           0 :         releaseInterferences(CallResource);
     877             :       }
     878             :     }
     879             : 
     880      141078 :   for (auto &Succ : SU->Succs) {
     881             :     if (Succ.isAssignedRegDep()) {
     882             :       auto Reg = Succ.getReg();
     883        1108 :       if (!LiveRegDefs[Reg])
     884           0 :         ++NumLiveRegs;
     885             :       // This becomes the nearest def. Note that an earlier def may still be
     886             :       // pending if this is a two-address node.
     887         554 :       LiveRegDefs[Reg] = SU;
     888             : 
     889             :       // Update LiveRegGen only if was empty before this unscheduling.
     890             :       // This is to avoid incorrect updating LiveRegGen set in previous run.
     891         554 :       if (!LiveRegGens[Reg]) {
     892             :         // Find the successor with the lowest height.
     893           0 :         LiveRegGens[Reg] = Succ.getSUnit();
     894           0 :         for (auto &Succ2 : SU->Succs) {
     895           0 :           if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
     896           0 :               Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
     897           0 :             LiveRegGens[Reg] = Succ2.getSUnit();
     898             :         }
     899             :       }
     900             :     }
     901             :   }
     902       17008 :   if (SU->getHeight() < MinAvailableCycle)
     903       15892 :     MinAvailableCycle = SU->getHeight();
     904             : 
     905        8504 :   SU->setHeightDirty();
     906        8504 :   SU->isScheduled = false;
     907        8504 :   SU->isAvailable = true;
     908        8504 :   if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
     909             :     // Don't make available until backtracking is complete.
     910           0 :     SU->isPending = true;
     911           0 :     PendingQueue.push_back(SU);
     912             :   }
     913             :   else {
     914        8504 :     AvailableQueue->push(SU);
     915             :   }
     916        8504 :   AvailableQueue->unscheduledNode(SU);
     917        8504 : }
     918             : 
     919             : /// After backtracking, the hazard checker needs to be restored to a state
     920             : /// corresponding the current cycle.
     921         859 : void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
     922         859 :   HazardRec->Reset();
     923             : 
     924        2577 :   unsigned LookAhead = std::min((unsigned)Sequence.size(),
     925        2577 :                                 HazardRec->getMaxLookAhead());
     926         859 :   if (LookAhead == 0)
     927             :     return;
     928             : 
     929             :   std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
     930           1 :   unsigned HazardCycle = (*I)->getHeight();
     931          13 :   for (auto E = Sequence.end(); I != E; ++I) {
     932          12 :     SUnit *SU = *I;
     933          30 :     for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
     934           9 :       HazardRec->RecedeCycle();
     935             :     }
     936          12 :     EmitNode(SU);
     937             :   }
     938             : }
     939             : 
     940             : /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
     941             : /// BTCycle in order to schedule a specific node.
     942         859 : void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
     943         859 :   SUnit *OldSU = Sequence.back();
     944             :   while (true) {
     945             :     Sequence.pop_back();
     946             :     // FIXME: use ready cycle instead of height
     947        8504 :     CurCycle = OldSU->getHeight();
     948        8504 :     UnscheduleNodeBottomUp(OldSU);
     949        8504 :     AvailableQueue->setCurCycle(CurCycle);
     950        8504 :     if (OldSU == BtSU)
     951             :       break;
     952        7645 :     OldSU = Sequence.back();
     953             :   }
     954             : 
     955             :   assert(!SU->isSucc(OldSU) && "Something is wrong!");
     956             : 
     957         859 :   RestoreHazardCheckerBottomUp();
     958             : 
     959         859 :   ReleasePending();
     960             : 
     961             :   ++NumBacktracks;
     962         859 : }
     963             : 
     964        2456 : static bool isOperandOf(const SUnit *SU, SDNode *N) {
     965        2456 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     966             :        SUNode = SUNode->getGluedNode()) {
     967        2489 :     if (SUNode->isOperandOf(N))
     968             :       return true;
     969             :   }
     970             :   return false;
     971             : }
     972             : 
     973             : /// TryUnfold - Attempt to unfold
     974        6604 : SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
     975        6604 :   SDNode *N = SU->getNode();
     976             :   // Use while over if to ease fall through.
     977             :   SmallVector<SDNode *, 2> NewNodes;
     978        6604 :   if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
     979             :     return nullptr;
     980             : 
     981             :   // unfolding an x86 DEC64m operation results in store, dec, load which
     982             :   // can't be handled here so quit
     983        6604 :   if (NewNodes.size() == 3)
     984             :     return nullptr;
     985             : 
     986             :   assert(NewNodes.size() == 2 && "Expected a load folding node!");
     987             : 
     988        6589 :   N = NewNodes[1];
     989        6589 :   SDNode *LoadNode = NewNodes[0];
     990        6589 :   unsigned NumVals = N->getNumValues();
     991        6589 :   unsigned OldNumVals = SU->getNode()->getNumValues();
     992             : 
     993             :   // LoadNode may already exist. This can happen when there is another
     994             :   // load from the same location and producing the same type of value
     995             :   // but it has different alignment or volatileness.
     996             :   bool isNewLoad = true;
     997             :   SUnit *LoadSU;
     998        6589 :   if (LoadNode->getNodeId() != -1) {
     999           1 :     LoadSU = &SUnits[LoadNode->getNodeId()];
    1000             :     // If LoadSU has already been scheduled, we should clone it but
    1001             :     // this would negate the benefit to unfolding so just return SU.
    1002           1 :     if (LoadSU->isScheduled)
    1003             :       return SU;
    1004             :     isNewLoad = false;
    1005             :   } else {
    1006        6588 :     LoadSU = CreateNewSUnit(LoadNode);
    1007        6588 :     LoadNode->setNodeId(LoadSU->NodeNum);
    1008             : 
    1009        6588 :     InitNumRegDefsLeft(LoadSU);
    1010        6588 :     computeLatency(LoadSU);
    1011             :   }
    1012             : 
    1013             :   LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
    1014             : 
    1015             :   // Now that we are committed to unfolding replace DAG Uses.
    1016       20962 :   for (unsigned i = 0; i != NumVals; ++i)
    1017       21561 :     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
    1018       19764 :   DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
    1019             :                                  SDValue(LoadNode, 1));
    1020             : 
    1021        6588 :   SUnit *NewSU = CreateNewSUnit(N);
    1022             :   assert(N->getNodeId() == -1 && "Node already inserted!");
    1023        6588 :   N->setNodeId(NewSU->NodeNum);
    1024             : 
    1025       13176 :   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1026       50907 :   for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
    1027             :     if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
    1028         599 :       NewSU->isTwoAddress = true;
    1029         599 :       break;
    1030             :     }
    1031             :   }
    1032       13176 :   if (MCID.isCommutable())
    1033        6012 :     NewSU->isCommutable = true;
    1034             : 
    1035        6588 :   InitNumRegDefsLeft(NewSU);
    1036        6588 :   computeLatency(NewSU);
    1037             : 
    1038             :   // Record all the edges to and from the old SU, by category.
    1039             :   SmallVector<SDep, 4> ChainPreds;
    1040             :   SmallVector<SDep, 4> ChainSuccs;
    1041             :   SmallVector<SDep, 4> LoadPreds;
    1042             :   SmallVector<SDep, 4> NodePreds;
    1043             :   SmallVector<SDep, 4> NodeSuccs;
    1044       21758 :   for (SDep &Pred : SU->Preds) {
    1045        7585 :     if (Pred.isCtrl())
    1046        5129 :       ChainPreds.push_back(Pred);
    1047        2456 :     else if (isOperandOf(Pred.getSUnit(), LoadNode))
    1048        1833 :       LoadPreds.push_back(Pred);
    1049             :     else
    1050         623 :       NodePreds.push_back(Pred);
    1051             :   }
    1052       46450 :   for (SDep &Succ : SU->Succs) {
    1053       19931 :     if (Succ.isCtrl())
    1054        6759 :       ChainSuccs.push_back(Succ);
    1055             :     else
    1056       13172 :       NodeSuccs.push_back(Succ);
    1057             :   }
    1058             : 
    1059             :   // Now assign edges to the newly-created nodes.
    1060       16846 :   for (const SDep &Pred : ChainPreds) {
    1061        5129 :     RemovePred(SU, Pred);
    1062        5129 :     if (isNewLoad)
    1063        5129 :       AddPred(LoadSU, Pred);
    1064             :   }
    1065       10254 :   for (const SDep &Pred : LoadPreds) {
    1066        1833 :     RemovePred(SU, Pred);
    1067        1833 :     if (isNewLoad)
    1068        1833 :       AddPred(LoadSU, Pred);
    1069             :   }
    1070        7834 :   for (const SDep &Pred : NodePreds) {
    1071         623 :     RemovePred(SU, Pred);
    1072         623 :     AddPred(NewSU, Pred);
    1073             :   }
    1074       32932 :   for (SDep D : NodeSuccs) {
    1075             :     SUnit *SuccDep = D.getSUnit();
    1076             :     D.setSUnit(SU);
    1077       13172 :     RemovePred(SuccDep, D);
    1078             :     D.setSUnit(NewSU);
    1079       13172 :     AddPred(SuccDep, D);
    1080             :     // Balance register pressure.
    1081       13172 :     if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
    1082       13172 :         !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
    1083           0 :       --NewSU->NumRegDefsLeft;
    1084             :   }
    1085       20106 :   for (SDep D : ChainSuccs) {
    1086             :     SUnit *SuccDep = D.getSUnit();
    1087             :     D.setSUnit(SU);
    1088        6759 :     RemovePred(SuccDep, D);
    1089        6759 :     if (isNewLoad) {
    1090             :       D.setSUnit(LoadSU);
    1091        6759 :       AddPred(SuccDep, D);
    1092             :     }
    1093             :   }
    1094             : 
    1095             :   // Add a data dependency to reflect that NewSU reads the value defined
    1096             :   // by LoadSU.
    1097             :   SDep D(LoadSU, SDep::Data, 0);
    1098        6588 :   D.setLatency(LoadSU->Latency);
    1099        6588 :   AddPred(NewSU, D);
    1100             : 
    1101        6588 :   if (isNewLoad)
    1102        6588 :     AvailableQueue->addNode(LoadSU);
    1103        6588 :   AvailableQueue->addNode(NewSU);
    1104             : 
    1105             :   ++NumUnfolds;
    1106             : 
    1107        6588 :   if (NewSU->NumSuccsLeft == 0)
    1108          45 :     NewSU->isAvailable = true;
    1109             : 
    1110             :   return NewSU;
    1111             : }
    1112             : 
    1113             : /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
    1114             : /// successors to the newly created node.
    1115       20605 : SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
    1116       20605 :   SDNode *N = SU->getNode();
    1117       20605 :   if (!N)
    1118             :     return nullptr;
    1119             : 
    1120             :   LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
    1121             :   LLVM_DEBUG(SU->dump(this));
    1122             : 
    1123         106 :   if (N->getGluedNode() &&
    1124         106 :       !TII->canCopyGluedNodeDuringSchedule(N)) {
    1125             :     LLVM_DEBUG(
    1126             :         dbgs()
    1127             :         << "Giving up because it has incoming glue and the target does not "
    1128             :            "want to copy it\n");
    1129             :     return nullptr;
    1130             :   }
    1131             : 
    1132             :   SUnit *NewSU;
    1133             :   bool TryUnfold = false;
    1134      108494 :   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
    1135             :     MVT VT = N->getSimpleValueType(i);
    1136       33746 :     if (VT == MVT::Glue) {
    1137             :       LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
    1138             :       return nullptr;
    1139       33746 :     } else if (VT == MVT::Other)
    1140             :       TryUnfold = true;
    1141             :   }
    1142       94507 :   for (const SDValue &Op : N->op_values()) {
    1143       74006 :     MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
    1144       74006 :     if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
    1145             :       LLVM_DEBUG(
    1146             :           dbgs() << "Giving up because it one of the operands is glue and "
    1147             :                     "the target does not want to copy it\n");
    1148             :       return nullptr;
    1149             :     }
    1150             :   }
    1151             : 
    1152             :   // If possible unfold instruction.
    1153       20501 :   if (TryUnfold) {
    1154        6604 :     SUnit *UnfoldSU = TryUnfoldSU(SU);
    1155        6604 :     if (!UnfoldSU)
    1156             :       return nullptr;
    1157             :     SU = UnfoldSU;
    1158             :     N = SU->getNode();
    1159             :     // If this can be scheduled don't bother duplicating and just return
    1160        6589 :     if (SU->NumSuccsLeft == 0)
    1161             :       return SU;
    1162             :   }
    1163             : 
    1164             :   LLVM_DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
    1165       20441 :   NewSU = CreateClone(SU);
    1166             : 
    1167             :   // New SUnit has the exact same predecessors.
    1168       69457 :   for (SDep &Pred : SU->Preds)
    1169             :     if (!Pred.isArtificial())
    1170       24508 :       AddPred(NewSU, Pred);
    1171             : 
    1172             :   // Only copy scheduled successors. Cut them from old node's successor
    1173             :   // list and move them over.
    1174             :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1175      103167 :   for (SDep &Succ : SU->Succs) {
    1176           0 :     if (Succ.isArtificial())
    1177             :       continue;
    1178             :     SUnit *SuccSU = Succ.getSUnit();
    1179       41363 :     if (SuccSU->isScheduled) {
    1180       20642 :       SDep D = Succ;
    1181             :       D.setSUnit(NewSU);
    1182       20642 :       AddPred(SuccSU, D);
    1183             :       D.setSUnit(SU);
    1184       20642 :       DelDeps.push_back(std::make_pair(SuccSU, D));
    1185             :     }
    1186             :   }
    1187       61725 :   for (auto &DelDep : DelDeps)
    1188       20642 :     RemovePred(DelDep.first, DelDep.second);
    1189             : 
    1190       20441 :   AvailableQueue->updateNode(SU);
    1191       20441 :   AvailableQueue->addNode(NewSU);
    1192             : 
    1193             :   ++NumDups;
    1194             :   return NewSU;
    1195             : }
    1196             : 
    1197             : /// InsertCopiesAndMoveSuccs - Insert register copies and move all
    1198             : /// scheduled successors of the given SUnit to the last copy.
    1199         119 : void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
    1200             :                                               const TargetRegisterClass *DestRC,
    1201             :                                               const TargetRegisterClass *SrcRC,
    1202             :                                               SmallVectorImpl<SUnit*> &Copies) {
    1203         119 :   SUnit *CopyFromSU = CreateNewSUnit(nullptr);
    1204         119 :   CopyFromSU->CopySrcRC = SrcRC;
    1205         119 :   CopyFromSU->CopyDstRC = DestRC;
    1206             : 
    1207         119 :   SUnit *CopyToSU = CreateNewSUnit(nullptr);
    1208         119 :   CopyToSU->CopySrcRC = DestRC;
    1209         119 :   CopyToSU->CopyDstRC = SrcRC;
    1210             : 
    1211             :   // Only copy scheduled successors. Cut them from old node's successor
    1212             :   // list and move them over.
    1213             :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1214        1167 :   for (SDep &Succ : SU->Succs) {
    1215           0 :     if (Succ.isArtificial())
    1216             :       continue;
    1217             :     SUnit *SuccSU = Succ.getSUnit();
    1218         524 :     if (SuccSU->isScheduled) {
    1219         283 :       SDep D = Succ;
    1220         283 :       D.setSUnit(CopyToSU);
    1221         283 :       AddPred(SuccSU, D);
    1222         283 :       DelDeps.push_back(std::make_pair(SuccSU, Succ));
    1223             :     }
    1224             :     else {
    1225             :       // Avoid scheduling the def-side copy before other successors. Otherwise
    1226             :       // we could introduce another physreg interference on the copy and
    1227             :       // continue inserting copies indefinitely.
    1228         482 :       AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
    1229             :     }
    1230             :   }
    1231         685 :   for (auto &DelDep : DelDeps)
    1232         283 :     RemovePred(DelDep.first, DelDep.second);
    1233             : 
    1234             :   SDep FromDep(SU, SDep::Data, Reg);
    1235         119 :   FromDep.setLatency(SU->Latency);
    1236         119 :   AddPred(CopyFromSU, FromDep);
    1237         119 :   SDep ToDep(CopyFromSU, SDep::Data, 0);
    1238         119 :   ToDep.setLatency(CopyFromSU->Latency);
    1239         119 :   AddPred(CopyToSU, ToDep);
    1240             : 
    1241         119 :   AvailableQueue->updateNode(SU);
    1242         119 :   AvailableQueue->addNode(CopyFromSU);
    1243         119 :   AvailableQueue->addNode(CopyToSU);
    1244         119 :   Copies.push_back(CopyFromSU);
    1245         119 :   Copies.push_back(CopyToSU);
    1246             : 
    1247             :   ++NumPRCopies;
    1248         119 : }
    1249             : 
    1250             : /// getPhysicalRegisterVT - Returns the ValueType of the physical register
    1251             : /// definition of the specified node.
    1252             : /// FIXME: Move to SelectionDAG?
    1253       20605 : static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
    1254             :                                  const TargetInstrInfo *TII) {
    1255             :   unsigned NumRes;
    1256       41210 :   if (N->getOpcode() == ISD::CopyFromReg) {
    1257             :     // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
    1258             :     NumRes = 1;
    1259             :   } else {
    1260       20551 :     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1261             :     assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
    1262       20551 :     NumRes = MCID.getNumDefs();
    1263       20553 :     for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
    1264       20553 :       if (Reg == *ImpDef)
    1265             :         break;
    1266           2 :       ++NumRes;
    1267             :     }
    1268             :   }
    1269       20605 :   return N->getSimpleValueType(NumRes);
    1270             : }
    1271             : 
    1272             : /// CheckForLiveRegDef - Return true and update live register vector if the
    1273             : /// specified register def of the specified SUnit clobbers any "live" registers.
    1274      667819 : static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
    1275             :                                SUnit **LiveRegDefs,
    1276             :                                SmallSet<unsigned, 4> &RegAdded,
    1277             :                                SmallVectorImpl<unsigned> &LRegs,
    1278             :                                const TargetRegisterInfo *TRI) {
    1279     5912870 :   for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
    1280             : 
    1281             :     // Check if Ref is live.
    1282     2288616 :     if (!LiveRegDefs[*AliasI]) continue;
    1283             : 
    1284             :     // Allow multiple uses of the same def.
    1285      163080 :     if (LiveRegDefs[*AliasI] == SU) continue;
    1286             : 
    1287             :     // Add Reg to the set of interfering live regs.
    1288       44166 :     if (RegAdded.insert(*AliasI).second) {
    1289       58776 :       LRegs.push_back(*AliasI);
    1290             :     }
    1291             :   }
    1292      667819 : }
    1293             : 
    1294             : /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
    1295             : /// by RegMask, and add them to LRegs.
    1296        1077 : static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
    1297             :                                      ArrayRef<SUnit*> LiveRegDefs,
    1298             :                                      SmallSet<unsigned, 4> &RegAdded,
    1299             :                                      SmallVectorImpl<unsigned> &LRegs) {
    1300             :   // Look at all live registers. Skip Reg0 and the special CallResource.
    1301      310131 :   for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
    1302      618108 :     if (!LiveRegDefs[i]) continue;
    1303         185 :     if (LiveRegDefs[i] == SU) continue;
    1304         185 :     if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
    1305         185 :     if (RegAdded.insert(i).second)
    1306           4 :       LRegs.push_back(i);
    1307             :   }
    1308        1077 : }
    1309             : 
    1310             : /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
    1311             : static const uint32_t *getNodeRegMask(const SDNode *N) {
    1312     3743704 :   for (const SDValue &Op : N->op_values())
    1313     3087471 :     if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
    1314        1081 :       return RegOp->getRegMask();
    1315             :   return nullptr;
    1316             : }
    1317             : 
    1318             : /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
    1319             : /// scheduling of the given node to satisfy live physical register dependencies.
    1320             : /// If the specific node is the last one that's available to schedule, do
    1321             : /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
    1322     4031448 : bool ScheduleDAGRRList::
    1323             : DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
    1324     4031448 :   if (NumLiveRegs == 0)
    1325             :     return false;
    1326             : 
    1327      788465 :   SmallSet<unsigned, 4> RegAdded;
    1328             :   // If this node would clobber any "live" register, then it's not ready.
    1329             :   //
    1330             :   // If SU is the currently live definition of the same register that it uses,
    1331             :   // then we are free to schedule it.
    1332     2351459 :   for (SDep &Pred : SU->Preds) {
    1333       42644 :     if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
    1334       38480 :       CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
    1335             :                          RegAdded, LRegs, TRI);
    1336             :   }
    1337             : 
    1338      788465 :   for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
    1339     1632962 :     if (Node->getOpcode() == ISD::INLINEASM) {
    1340             :       // Inline asm can clobber physical defs.
    1341           3 :       unsigned NumOps = Node->getNumOperands();
    1342           6 :       if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
    1343             :         --NumOps;  // Ignore the glue operand.
    1344             : 
    1345          11 :       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
    1346             :         unsigned Flags =
    1347          24 :           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
    1348             :         unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
    1349             : 
    1350           8 :         ++i; // Skip the ID value.
    1351           5 :         if (InlineAsm::isRegDefKind(Flags) ||
    1352          13 :             InlineAsm::isRegDefEarlyClobberKind(Flags) ||
    1353             :             InlineAsm::isClobberKind(Flags)) {
    1354             :           // Check for def of register or earlyclobber register.
    1355          18 :           for (; NumVals; --NumVals, ++i) {
    1356          12 :             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
    1357           6 :             if (TargetRegisterInfo::isPhysicalRegister(Reg))
    1358           6 :               CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1359             :           }
    1360             :         } else
    1361           2 :           i += NumVals;
    1362             :       }
    1363           3 :       continue;
    1364             :     }
    1365             : 
    1366      816478 :     if (!Node->isMachineOpcode())
    1367      159172 :       continue;
    1368             :     // If we're in the middle of scheduling a call, don't begin scheduling
    1369             :     // another call. Also, don't allow any physical registers to be live across
    1370             :     // the call.
    1371      657306 :     if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
    1372             :       // Check the special calling-sequence resource.
    1373        1076 :       unsigned CallResource = TRI->getNumRegs();
    1374        2152 :       if (LiveRegDefs[CallResource]) {
    1375         890 :         SDNode *Gen = LiveRegGens[CallResource]->getNode();
    1376             :         while (SDNode *Glued = Gen->getGluedNode())
    1377             :           Gen = Glued;
    1378        1779 :         if (!IsChainDependent(Gen, Node, 0, TII) &&
    1379         889 :             RegAdded.insert(CallResource).second)
    1380         889 :           LRegs.push_back(CallResource);
    1381             :       }
    1382             :     }
    1383      657306 :     if (const uint32_t *RegMask = getNodeRegMask(Node))
    1384        2154 :       CheckForLiveRegDefMasked(SU, RegMask,
    1385        1077 :                                makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
    1386             :                                RegAdded, LRegs);
    1387             : 
    1388     1314612 :     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
    1389     1314612 :     if (MCID.hasOptionalDef()) {
    1390             :       // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
    1391             :       // This operand can be either a def of CPSR, if the S bit is set; or a use
    1392             :       // of %noreg.  When the OptionalDef is set to a valid register, we need to
    1393             :       // handle it in the same way as an ImplicitDef.
    1394       18093 :       for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
    1395        3793 :         if (MCID.OpInfo[i].isOptionalDef()) {
    1396         872 :           const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
    1397         436 :           unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
    1398         872 :           CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1399             :         }
    1400             :     }
    1401      657306 :     if (!MCID.ImplicitDefs)
    1402      329505 :       continue;
    1403     1624081 :     for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
    1404     1296280 :       CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1405             :   }
    1406             : 
    1407      788465 :   return !LRegs.empty();
    1408             : }
    1409             : 
    1410      310193 : void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
    1411             :   // Add the nodes that aren't ready back onto the available list.
    1412      310193 :   for (unsigned i = Interferences.size(); i > 0; --i) {
    1413       60562 :     SUnit *SU = Interferences[i-1];
    1414       30281 :     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
    1415       30281 :     if (Reg) {
    1416             :       SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
    1417       30281 :       if (!is_contained(LRegs, Reg))
    1418           2 :         continue;
    1419             :     }
    1420       30279 :     SU->isPending = false;
    1421             :     // The interfering node may no longer be available due to backtracking.
    1422             :     // Furthermore, it may have been made available again, in which case it is
    1423             :     // now already in the AvailableQueue.
    1424       30279 :     if (SU->isAvailable && !SU->NodeQueueId) {
    1425             :       LLVM_DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
    1426        8553 :       AvailableQueue->push(SU);
    1427             :     }
    1428       60558 :     if (i < Interferences.size())
    1429           0 :       Interferences[i-1] = Interferences.back();
    1430             :     Interferences.pop_back();
    1431             :     LRegsMap.erase(LRegsPos);
    1432             :   }
    1433      310193 : }
    1434             : 
    1435             : /// Return a node that can be scheduled in this cycle. Requirements:
    1436             : /// (1) Ready: latency has been satisfied
    1437             : /// (2) No Hazards: resources are available
    1438             : /// (3) No Interferences: may unschedule to break register interferences.
    1439     4021772 : SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
    1440     4021772 :   SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
    1441     4022631 :   auto FindAvailableNode = [&]() {
    1442     4143753 :     while (CurSU) {
    1443             :       SmallVector<unsigned, 4> LRegs;
    1444     4092008 :       if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
    1445             :         break;
    1446             :       LLVM_DEBUG(dbgs() << "    Interfering reg ";
    1447             :                  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
    1448             :                  else dbgs() << printReg(LRegs[0], TRI);
    1449             :                  dbgs() << " SU #" << CurSU->NodeNum << '\n');
    1450             :       std::pair<LRegsMapT::iterator, bool> LRegsPair =
    1451       90843 :         LRegsMap.insert(std::make_pair(CurSU, LRegs));
    1452       30281 :       if (LRegsPair.second) {
    1453       30279 :         CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
    1454       30279 :         Interferences.push_back(CurSU);
    1455             :       }
    1456             :       else {
    1457             :         assert(CurSU->isPending && "Interferences are pending");
    1458             :         // Update the interference with current live regs.
    1459           2 :         LRegsPair.first->second = LRegs;
    1460             :       }
    1461       60562 :       CurSU = AvailableQueue->pop();
    1462             :     }
    1463     8044403 :   };
    1464     4021772 :   FindAvailableNode();
    1465     4021772 :   if (CurSU)
    1466             :     return CurSU;
    1467             : 
    1468             :   // All candidates are delayed due to live physical reg dependencies.
    1469             :   // Try backtracking, code duplication, or inserting cross class copies
    1470             :   // to resolve it.
    1471       69228 :   for (SUnit *TrySU : Interferences) {
    1472       24741 :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1473             : 
    1474             :     // Try unscheduling up to the point where it's safe to schedule
    1475             :     // this node.
    1476             :     SUnit *BtSU = nullptr;
    1477             :     unsigned LiveCycle = std::numeric_limits<unsigned>::max();
    1478       74223 :     for (unsigned Reg : LRegs) {
    1479       74223 :       if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
    1480       24741 :         BtSU = LiveRegGens[Reg];
    1481             :         LiveCycle = BtSU->getHeight();
    1482             :       }
    1483             :     }
    1484       49482 :     if (!WillCreateCycle(TrySU, BtSU))  {
    1485             :       // BacktrackBottomUp mutates Interferences!
    1486         859 :       BacktrackBottomUp(TrySU, BtSU);
    1487             : 
    1488             :       // Force the current node to be scheduled before the node that
    1489             :       // requires the physical reg dep.
    1490         859 :       if (BtSU->isAvailable) {
    1491         859 :         BtSU->isAvailable = false;
    1492         859 :         if (!BtSU->isPending)
    1493         859 :           AvailableQueue->remove(BtSU);
    1494             :       }
    1495             :       LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
    1496             :                         << ") to SU(" << TrySU->NodeNum << ")\n");
    1497         859 :       AddPred(TrySU, SDep(BtSU, SDep::Artificial));
    1498             : 
    1499             :       // If one or more successors has been unscheduled, then the current
    1500             :       // node is no longer available.
    1501         859 :       if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
    1502             :         LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
    1503         263 :         CurSU = AvailableQueue->pop();
    1504             :       } else {
    1505             :         LLVM_DEBUG(dbgs() << "TrySU available\n");
    1506             :         // Available and in AvailableQueue
    1507         596 :         AvailableQueue->remove(TrySU);
    1508         596 :         CurSU = TrySU;
    1509             :       }
    1510         859 :       FindAvailableNode();
    1511             :       // Interferences has been mutated. We must break.
    1512         859 :       break;
    1513             :     }
    1514             :   }
    1515             : 
    1516       21464 :   if (!CurSU) {
    1517             :     // Can't backtrack. If it's too expensive to copy the value, then try
    1518             :     // duplicate the nodes that produces these "too expensive to copy"
    1519             :     // values to break the dependency. In case even that doesn't work,
    1520             :     // insert cross class copies.
    1521             :     // If it's not too expensive, i.e. cost != -1, issue copies.
    1522       20605 :     SUnit *TrySU = Interferences[0];
    1523       20605 :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1524             :     assert(LRegs.size() == 1 && "Can't handle this yet!");
    1525       20605 :     unsigned Reg = LRegs[0];
    1526       41210 :     SUnit *LRDef = LiveRegDefs[Reg];
    1527       20605 :     MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
    1528             :     const TargetRegisterClass *RC =
    1529       20605 :       TRI->getMinimalPhysRegClass(Reg, VT);
    1530       20605 :     const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
    1531             : 
    1532             :     // If cross copy register class is the same as RC, then it must be possible
    1533             :     // copy the value directly. Do not try duplicate the def.
    1534             :     // If cross copy register class is not the same as RC, then it's possible to
    1535             :     // copy the value but it require cross register class copies and it is
    1536             :     // expensive.
    1537             :     // If cross copy register class is null, then it's not possible to copy
    1538             :     // the value at all.
    1539             :     SUnit *NewDef = nullptr;
    1540       20605 :     if (DestRC != RC) {
    1541       20605 :       NewDef = CopyAndMoveSuccessors(LRDef);
    1542       20605 :       if (!DestRC && !NewDef)
    1543           0 :         report_fatal_error("Can't handle live physical register dependency!");
    1544             :     }
    1545       20605 :     if (!NewDef) {
    1546             :       // Issue copies, these can be expensive cross register class copies.
    1547             :       SmallVector<SUnit*, 2> Copies;
    1548         119 :       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
    1549             :       LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
    1550             :                         << " to SU #" << Copies.front()->NodeNum << "\n");
    1551         238 :       AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
    1552         119 :       NewDef = Copies.back();
    1553             :     }
    1554             : 
    1555             :     LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
    1556             :                       << " to SU #" << TrySU->NodeNum << "\n");
    1557       20605 :     LiveRegDefs[Reg] = NewDef;
    1558       41210 :     AddPred(NewDef, SDep(TrySU, SDep::Artificial));
    1559       20605 :     TrySU->isAvailable = false;
    1560       20605 :     CurSU = NewDef;
    1561             :   }
    1562             :   assert(CurSU && "Unable to resolve live physical register dependencies!");
    1563       21464 :   return CurSU;
    1564             : }
    1565             : 
    1566             : /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
    1567             : /// schedulers.
    1568      363255 : void ScheduleDAGRRList::ListScheduleBottomUp() {
    1569             :   // Release any predecessors of the special Exit node.
    1570      363255 :   ReleasePredecessors(&ExitSU);
    1571             : 
    1572             :   // Add root to Available queue.
    1573      363255 :   if (!SUnits.empty()) {
    1574      360373 :     SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
    1575             :     assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
    1576      360373 :     RootSU->isAvailable = true;
    1577      360373 :     AvailableQueue->push(RootSU);
    1578             :   }
    1579             : 
    1580             :   // While Available queue is not empty, grab the node with the highest
    1581             :   // priority. If it is not ready put it back.  Schedule the node.
    1582      726510 :   Sequence.reserve(SUnits.size());
    1583     4385027 :   while (!AvailableQueue->empty() || !Interferences.empty()) {
    1584             :     LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
    1585             :                AvailableQueue->dump(this));
    1586             : 
    1587             :     // Pick the best node to schedule taking all constraints into
    1588             :     // consideration.
    1589     4021772 :     SUnit *SU = PickNodeToScheduleBottomUp();
    1590             : 
    1591     4021772 :     AdvancePastStalls(SU);
    1592             : 
    1593     4021772 :     ScheduleNodeBottomUp(SU);
    1594             : 
    1595     4394177 :     while (AvailableQueue->empty() && !PendingQueue.empty()) {
    1596             :       // Advance the cycle to free resources. Skip ahead to the next ready SU.
    1597             :       assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
    1598             :              "MinAvailableCycle uninitialized");
    1599           0 :       AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
    1600             :     }
    1601             :   }
    1602             : 
    1603             :   // Reverse the order if it is bottom up.
    1604             :   std::reverse(Sequence.begin(), Sequence.end());
    1605             : 
    1606             : #ifndef NDEBUG
    1607             :   VerifyScheduledSequence(/*isBottomUp=*/true);
    1608             : #endif
    1609      363255 : }
    1610             : 
    1611             : namespace {
    1612             : 
    1613             : class RegReductionPQBase;
    1614             : 
    1615             : struct queue_sort {
    1616             :   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
    1617             : };
    1618             : 
    1619             : #ifndef NDEBUG
    1620             : template<class SF>
    1621             : struct reverse_sort : public queue_sort {
    1622             :   SF &SortFunc;
    1623             : 
    1624             :   reverse_sort(SF &sf) : SortFunc(sf) {}
    1625             : 
    1626             :   bool operator()(SUnit* left, SUnit* right) const {
    1627             :     // reverse left/right rather than simply !SortFunc(left, right)
    1628             :     // to expose different paths in the comparison logic.
    1629             :     return SortFunc(right, left);
    1630             :   }
    1631             : };
    1632             : #endif // NDEBUG
    1633             : 
    1634             : /// bu_ls_rr_sort - Priority function for bottom up register pressure
    1635             : // reduction scheduler.
    1636             : struct bu_ls_rr_sort : public queue_sort {
    1637             :   enum {
    1638             :     IsBottomUp = true,
    1639             :     HasReadyFilter = false
    1640             :   };
    1641             : 
    1642             :   RegReductionPQBase *SPQ;
    1643             : 
    1644        7644 :   bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1645             : 
    1646             :   bool operator()(SUnit* left, SUnit* right) const;
    1647             : };
    1648             : 
    1649             : // src_ls_rr_sort - Priority function for source order scheduler.
    1650             : struct src_ls_rr_sort : public queue_sort {
    1651             :   enum {
    1652             :     IsBottomUp = true,
    1653             :     HasReadyFilter = false
    1654             :   };
    1655             : 
    1656             :   RegReductionPQBase *SPQ;
    1657             : 
    1658      326849 :   src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1659             : 
    1660             :   bool operator()(SUnit* left, SUnit* right) const;
    1661             : };
    1662             : 
    1663             : // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
    1664             : struct hybrid_ls_rr_sort : public queue_sort {
    1665             :   enum {
    1666             :     IsBottomUp = true,
    1667             :     HasReadyFilter = false
    1668             :   };
    1669             : 
    1670             :   RegReductionPQBase *SPQ;
    1671             : 
    1672       12952 :   hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1673             : 
    1674             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1675             : 
    1676             :   bool operator()(SUnit* left, SUnit* right) const;
    1677             : };
    1678             : 
    1679             : // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
    1680             : // scheduler.
    1681             : struct ilp_ls_rr_sort : public queue_sort {
    1682             :   enum {
    1683             :     IsBottomUp = true,
    1684             :     HasReadyFilter = false
    1685             :   };
    1686             : 
    1687             :   RegReductionPQBase *SPQ;
    1688             : 
    1689       15810 :   ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1690             : 
    1691             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1692             : 
    1693             :   bool operator()(SUnit* left, SUnit* right) const;
    1694             : };
    1695             : 
    1696      726510 : class RegReductionPQBase : public SchedulingPriorityQueue {
    1697             : protected:
    1698             :   std::vector<SUnit *> Queue;
    1699             :   unsigned CurQueueId = 0;
    1700             :   bool TracksRegPressure;
    1701             :   bool SrcOrder;
    1702             : 
    1703             :   // SUnits - The SUnits for the current graph.
    1704             :   std::vector<SUnit> *SUnits;
    1705             : 
    1706             :   MachineFunction &MF;
    1707             :   const TargetInstrInfo *TII;
    1708             :   const TargetRegisterInfo *TRI;
    1709             :   const TargetLowering *TLI;
    1710             :   ScheduleDAGRRList *scheduleDAG = nullptr;
    1711             : 
    1712             :   // SethiUllmanNumbers - The SethiUllman number for each node.
    1713             :   std::vector<unsigned> SethiUllmanNumbers;
    1714             : 
    1715             :   /// RegPressure - Tracking current reg pressure per register class.
    1716             :   std::vector<unsigned> RegPressure;
    1717             : 
    1718             :   /// RegLimit - Tracking the number of allocatable registers per register
    1719             :   /// class.
    1720             :   std::vector<unsigned> RegLimit;
    1721             : 
    1722             : public:
    1723      363255 :   RegReductionPQBase(MachineFunction &mf,
    1724             :                      bool hasReadyFilter,
    1725             :                      bool tracksrp,
    1726             :                      bool srcorder,
    1727             :                      const TargetInstrInfo *tii,
    1728             :                      const TargetRegisterInfo *tri,
    1729             :                      const TargetLowering *tli)
    1730      363255 :     : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
    1731     1453020 :       SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
    1732      363255 :     if (TracksRegPressure) {
    1733             :       unsigned NumRC = TRI->getNumRegClasses();
    1734       28762 :       RegLimit.resize(NumRC);
    1735       28762 :       RegPressure.resize(NumRC);
    1736             :       std::fill(RegLimit.begin(), RegLimit.end(), 0);
    1737             :       std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1738     4654216 :       for (const TargetRegisterClass *RC : TRI->regclasses())
    1739     6895038 :         RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
    1740             :     }
    1741      363255 :   }
    1742             : 
    1743             :   void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
    1744      363255 :     scheduleDAG = scheduleDag;
    1745             :   }
    1746             : 
    1747             :   ScheduleHazardRecognizer* getHazardRec() {
    1748     6410804 :     return scheduleDAG->getHazardRec();
    1749             :   }
    1750             : 
    1751             :   void initNodes(std::vector<SUnit> &sunits) override;
    1752             : 
    1753             :   void addNode(const SUnit *SU) override;
    1754             : 
    1755             :   void updateNode(const SUnit *SU) override;
    1756             : 
    1757      363255 :   void releaseState() override {
    1758      363255 :     SUnits = nullptr;
    1759             :     SethiUllmanNumbers.clear();
    1760             :     std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1761      363255 :   }
    1762             : 
    1763             :   unsigned getNodePriority(const SUnit *SU) const;
    1764             : 
    1765             :   unsigned getNodeOrdering(const SUnit *SU) const {
    1766    25339230 :     if (!SU->getNode()) return 0;
    1767             : 
    1768    25339095 :     return SU->getNode()->getIROrder();
    1769             :   }
    1770             : 
    1771    32843514 :   bool empty() const override { return Queue.empty(); }
    1772             : 
    1773     4034079 :   void push(SUnit *U) override {
    1774             :     assert(!U->NodeQueueId && "Node in the queue already");
    1775     4034079 :     U->NodeQueueId = ++CurQueueId;
    1776     4034079 :     Queue.push_back(U);
    1777     4034079 :   }
    1778             : 
    1779        3227 :   void remove(SUnit *SU) override {
    1780             :     assert(!Queue.empty() && "Queue is empty!");
    1781             :     assert(SU->NodeQueueId != 0 && "Not in queue!");
    1782             :     std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
    1783        3227 :     if (I != std::prev(Queue.end()))
    1784             :       std::swap(*I, Queue.back());
    1785             :     Queue.pop_back();
    1786        3227 :     SU->NodeQueueId = 0;
    1787        3227 :   }
    1788             : 
    1789       13172 :   bool tracksRegPressure() const override { return TracksRegPressure; }
    1790             : 
    1791             :   void dumpRegPressure() const;
    1792             : 
    1793             :   bool HighRegPressure(const SUnit *SU) const;
    1794             : 
    1795             :   bool MayReduceRegPressure(SUnit *SU) const;
    1796             : 
    1797             :   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
    1798             : 
    1799             :   void scheduledNode(SUnit *SU) override;
    1800             : 
    1801             :   void unscheduledNode(SUnit *SU) override;
    1802             : 
    1803             : protected:
    1804             :   bool canClobber(const SUnit *SU, const SUnit *Op);
    1805             :   void AddPseudoTwoAddrDeps();
    1806             :   void PrescheduleNodesWithMultipleUses();
    1807             :   void CalculateSethiUllmanNumbers();
    1808             : };
    1809             : 
    1810             : template<class SF>
    1811     4030852 : static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
    1812             :   std::vector<SUnit *>::iterator Best = Q.begin();
    1813    17148790 :   for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
    1814    13178864 :     if (Picker(*Best, *I))
    1815             :       Best = I;
    1816     4030852 :   SUnit *V = *Best;
    1817     4030852 :   if (Best != std::prev(Q.end()))
    1818             :     std::swap(*Best, Q.back());
    1819             :   Q.pop_back();
    1820     4030852 :   return V;
    1821             : }
    1822             : 
    1823             : template<class SF>
    1824             : SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
    1825             : #ifndef NDEBUG
    1826             :   if (DAG->StressSched) {
    1827             :     reverse_sort<SF> RPicker(Picker);
    1828             :     return popFromQueueImpl(Q, RPicker);
    1829             :   }
    1830             : #endif
    1831             :   (void)DAG;
    1832     4030852 :   return popFromQueueImpl(Q, Picker);
    1833             : }
    1834             : 
    1835             : //===----------------------------------------------------------------------===//
    1836             : //                RegReductionPriorityQueue Definition
    1837             : //===----------------------------------------------------------------------===//
    1838             : //
    1839             : // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
    1840             : // to reduce register pressure.
    1841             : //
    1842             : template<class SF>
    1843      363255 : class RegReductionPriorityQueue : public RegReductionPQBase {
    1844             :   SF Picker;
    1845             : 
    1846             : public:
    1847      363255 :   RegReductionPriorityQueue(MachineFunction &mf,
    1848             :                             bool tracksrp,
    1849             :                             bool srcorder,
    1850             :                             const TargetInstrInfo *tii,
    1851             :                             const TargetRegisterInfo *tri,
    1852             :                             const TargetLowering *tli)
    1853             :     : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
    1854             :                          tii, tri, tli),
    1855      363255 :       Picker(this) {}
    1856             : 
    1857           0 :   bool isBottomUp() const override { return SF::IsBottomUp; }
    1858             : 
    1859           0 :   bool isReady(SUnit *U) const override {
    1860           0 :     return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
    1861             :   }
    1862             : 
    1863     4040284 :   SUnit *pop() override {
    1864     4040284 :     if (Queue.empty()) return nullptr;
    1865             : 
    1866     4030852 :     SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
    1867     4030852 :     V->NodeQueueId = 0;
    1868     4030852 :     return V;
    1869             :   }
    1870             : 
    1871             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    1872             :   LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
    1873             :     // Emulate pop() without clobbering NodeQueueIds.
    1874             :     std::vector<SUnit *> DumpQueue = Queue;
    1875             :     SF DumpPicker = Picker;
    1876             :     while (!DumpQueue.empty()) {
    1877             :       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
    1878             :       dbgs() << "Height " << SU->getHeight() << ": ";
    1879             :       SU->dump(DAG);
    1880             :     }
    1881             :   }
    1882             : #endif
    1883             : };
    1884             : 
    1885             : using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
    1886             : using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
    1887             : using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
    1888             : using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
    1889             : 
    1890             : } // end anonymous namespace
    1891             : 
    1892             : //===----------------------------------------------------------------------===//
    1893             : //           Static Node Priority for Register Pressure Reduction
    1894             : //===----------------------------------------------------------------------===//
    1895             : 
    1896             : // Check for special nodes that bypass scheduling heuristics.
    1897             : // Currently this pushes TokenFactor nodes down, but may be used for other
    1898             : // pseudo-ops as well.
    1899             : //
    1900             : // Return -1 to schedule right above left, 1 for left above right.
    1901             : // Return 0 if no bias exists.
    1902             : static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
    1903    13117938 :   bool LSchedLow = left->isScheduleLow;
    1904    13117938 :   bool RSchedLow = right->isScheduleLow;
    1905    13117938 :   if (LSchedLow != RSchedLow)
    1906       73399 :     return LSchedLow < RSchedLow ? 1 : -1;
    1907             :   return 0;
    1908             : }
    1909             : 
    1910             : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
    1911             : /// Smaller number is the higher priority.
    1912             : static unsigned
    1913     4040416 : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
    1914     8080832 :   if (SUNumbers[SU->NodeNum] != 0)
    1915             :     return SUNumbers[SU->NodeNum];
    1916             : 
    1917             :   // Use WorkList to avoid stack overflow on excessively large IRs.
    1918             :   struct WorkState {
    1919     4040416 :     WorkState(const SUnit *SU) : SU(SU) {}
    1920             :     const SUnit *SU;
    1921             :     unsigned PredsProcessed = 0;
    1922             :   };
    1923             : 
    1924             :   SmallVector<WorkState, 16> WorkList;
    1925     1754151 :   WorkList.push_back(SU);
    1926     8080832 :   while (!WorkList.empty()) {
    1927             :     auto &Temp = WorkList.back();
    1928     6326681 :     auto *TempSU = Temp.SU;
    1929             :     bool AllPredsKnown = true;
    1930             :     // Try to find a non-evaluated pred and push it into the processing stack.
    1931    17744676 :     for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
    1932             :       auto &Pred = TempSU->Preds[P];
    1933     4831922 :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1934             :       SUnit *PredSU = Pred.getSUnit();
    1935     5906828 :       if (SUNumbers[PredSU->NodeNum] == 0) {
    1936             : #ifndef NDEBUG
    1937             :         // In debug mode, check that we don't have such element in the stack.
    1938             :         for (auto It : WorkList)
    1939             :           assert(It.SU != PredSU && "Trying to push an element twice?");
    1940             : #endif
    1941             :         // Next time start processing this one starting from the next pred.
    1942     2286265 :         Temp.PredsProcessed = P + 1;
    1943     2286265 :         WorkList.push_back(PredSU);
    1944             :         AllPredsKnown = false;
    1945             :         break;
    1946             :       }
    1947             :     }
    1948             : 
    1949     2286265 :     if (!AllPredsKnown)
    1950     2286265 :       continue;
    1951             : 
    1952             :     // Once all preds are known, we can calculate the answer for this one.
    1953             :     unsigned SethiUllmanNumber = 0;
    1954             :     unsigned Extra = 0;
    1955    13704260 :     for (const SDep &Pred : TempSU->Preds) {
    1956     4831922 :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1957             :       SUnit *PredSU = Pred.getSUnit();
    1958     5906828 :       unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
    1959             :       assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
    1960     2953414 :       if (PredSethiUllman > SethiUllmanNumber) {
    1961             :         SethiUllmanNumber = PredSethiUllman;
    1962             :         Extra = 0;
    1963      812329 :       } else if (PredSethiUllman == SethiUllmanNumber)
    1964      667075 :         ++Extra;
    1965             :     }
    1966             : 
    1967     4040416 :     SethiUllmanNumber += Extra;
    1968     4040416 :     if (SethiUllmanNumber == 0)
    1969             :       SethiUllmanNumber = 1;
    1970     8080832 :     SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
    1971             :     WorkList.pop_back();
    1972             :   }
    1973             : 
    1974             :   assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
    1975     3508302 :   return SUNumbers[SU->NodeNum];
    1976             : }
    1977             : 
    1978             : /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
    1979             : /// scheduling units.
    1980      363255 : void RegReductionPQBase::CalculateSethiUllmanNumbers() {
    1981      726510 :   SethiUllmanNumbers.assign(SUnits->size(), 0);
    1982             : 
    1983     4712511 :   for (const SUnit &SU : *SUnits)
    1984     3986001 :     CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
    1985      363255 : }
    1986             : 
    1987       33855 : void RegReductionPQBase::addNode(const SUnit *SU) {
    1988       67710 :   unsigned SUSize = SethiUllmanNumbers.size();
    1989       67710 :   if (SUnits->size() > SUSize)
    1990       20447 :     SethiUllmanNumbers.resize(SUSize*2, 0);
    1991       33855 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    1992       33855 : }
    1993             : 
    1994       20560 : void RegReductionPQBase::updateNode(const SUnit *SU) {
    1995       41120 :   SethiUllmanNumbers[SU->NodeNum] = 0;
    1996       20560 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    1997       20560 : }
    1998             : 
    1999             : // Lower priority means schedule further down. For bottom-up scheduling, lower
    2000             : // priority SUs are scheduled before higher priority SUs.
    2001    14987886 : unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
    2002             :   assert(SU->NodeNum < SethiUllmanNumbers.size());
    2003    14987886 :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2004    14987886 :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2005             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2006             :     // avoid spilling.
    2007             :     return 0;
    2008    29037148 :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2009    29037148 :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2010             :       Opc == TargetOpcode::INSERT_SUBREG)
    2011             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2012             :     // close to their uses to facilitate coalescing.
    2013             :     return 0;
    2014    14518574 :   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
    2015             :     // If SU does not have a register use, i.e. it doesn't produce a value
    2016             :     // that would be consumed (e.g. store), then it terminates a chain of
    2017             :     // computation.  Give it a large SethiUllman number so it will be
    2018             :     // scheduled right before its predecessors that it doesn't lengthen
    2019             :     // their live ranges.
    2020             :     return 0xffff;
    2021    11753033 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2022             :     // If SU does not have a register def, schedule it close to its uses
    2023             :     // because it does not lengthen any live ranges.
    2024             :     return 0;
    2025             : #if 1
    2026    19325894 :   return SethiUllmanNumbers[SU->NodeNum];
    2027             : #else
    2028             :   unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
    2029             :   if (SU->isCallOp) {
    2030             :     // FIXME: This assumes all of the defs are used as call operands.
    2031             :     int NP = (int)Priority - SU->getNode()->getNumValues();
    2032             :     return (NP > 0) ? NP : 0;
    2033             :   }
    2034             :   return Priority;
    2035             : #endif
    2036             : }
    2037             : 
    2038             : //===----------------------------------------------------------------------===//
    2039             : //                     Register Pressure Tracking
    2040             : //===----------------------------------------------------------------------===//
    2041             : 
    2042             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2043             : LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
    2044             :   for (const TargetRegisterClass *RC : TRI->regclasses()) {
    2045             :     unsigned Id = RC->getID();
    2046             :     unsigned RP = RegPressure[Id];
    2047             :     if (!RP) continue;
    2048             :     LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
    2049             :                       << RegLimit[Id] << '\n');
    2050             :   }
    2051             : }
    2052             : #endif
    2053             : 
    2054      334044 : bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
    2055      334044 :   if (!TLI)
    2056             :     return false;
    2057             : 
    2058     1046826 :   for (const SDep &Pred : SU->Preds) {
    2059      377051 :     if (Pred.isCtrl())
    2060       23568 :       continue;
    2061             :     SUnit *PredSU = Pred.getSUnit();
    2062             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2063             :     // to cover the number of registers defined (they are all live).
    2064      353483 :     if (PredSU->NumRegDefsLeft == 0) {
    2065      144701 :       continue;
    2066             :     }
    2067      422681 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2068      636580 :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2069             :       unsigned RCId, Cost;
    2070      234559 :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2071             : 
    2072      703677 :       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
    2073       20660 :         return true;
    2074             :     }
    2075             :   }
    2076             :   return false;
    2077             : }
    2078             : 
    2079             : bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
    2080             :   const SDNode *N = SU->getNode();
    2081             : 
    2082             :   if (!N->isMachineOpcode() || !SU->NumSuccs)
    2083             :     return false;
    2084             : 
    2085             :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2086             :   for (unsigned i = 0; i != NumDefs; ++i) {
    2087             :     MVT VT = N->getSimpleValueType(i);
    2088             :     if (!N->hasAnyUseOfValue(i))
    2089             :       continue;
    2090             :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2091             :     if (RegPressure[RCId] >= RegLimit[RCId])
    2092             :       return true;
    2093             :   }
    2094             :   return false;
    2095             : }
    2096             : 
    2097             : // Compute the register pressure contribution by this instruction by count up
    2098             : // for uses that are not live and down for defs. Only count register classes
    2099             : // that are already under high pressure. As a side effect, compute the number of
    2100             : // uses of registers that are already live.
    2101             : //
    2102             : // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
    2103             : // so could probably be factored.
    2104      291368 : int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
    2105      291368 :   LiveUses = 0;
    2106             :   int PDiff = 0;
    2107      907882 :   for (const SDep &Pred : SU->Preds) {
    2108      308257 :     if (Pred.isCtrl())
    2109       25732 :       continue;
    2110             :     SUnit *PredSU = Pred.getSUnit();
    2111             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2112             :     // to cover the number of registers defined (they are all live).
    2113      350271 :     if (PredSU->NumRegDefsLeft == 0) {
    2114       67746 :       if (PredSU->getNode()->isMachineOpcode())
    2115       55893 :         ++LiveUses;
    2116       67746 :       continue;
    2117             :     }
    2118      429669 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2119      644559 :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2120      214890 :       MVT VT = RegDefPos.GetValue();
    2121      214890 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2122      644670 :       if (RegPressure[RCId] >= RegLimit[RCId])
    2123       82519 :         ++PDiff;
    2124             :     }
    2125             :   }
    2126      291368 :   const SDNode *N = SU->getNode();
    2127             : 
    2128      291368 :   if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
    2129             :     return PDiff;
    2130             : 
    2131      383709 :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2132      383049 :   for (unsigned i = 0; i != NumDefs; ++i) {
    2133      127573 :     MVT VT = N->getSimpleValueType(i);
    2134      127573 :     if (!N->hasAnyUseOfValue(i))
    2135           6 :       continue;
    2136      127567 :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2137      382701 :     if (RegPressure[RCId] >= RegLimit[RCId])
    2138       25124 :       --PDiff;
    2139             :   }
    2140             :   return PDiff;
    2141             : }
    2142             : 
    2143     4021772 : void RegReductionPQBase::scheduledNode(SUnit *SU) {
    2144     4021772 :   if (!TracksRegPressure)
    2145             :     return;
    2146             : 
    2147      222818 :   if (!SU->getNode())
    2148             :     return;
    2149             : 
    2150      709420 :   for (const SDep &Pred : SU->Preds) {
    2151      243302 :     if (Pred.isCtrl())
    2152       70937 :       continue;
    2153             :     SUnit *PredSU = Pred.getSUnit();
    2154             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2155             :     // to cover the number of registers defined (they are all live).
    2156      172365 :     if (PredSU->NumRegDefsLeft == 0) {
    2157       25638 :       continue;
    2158             :     }
    2159             :     // FIXME: The ScheduleDAG currently loses information about which of a
    2160             :     // node's values is consumed by each dependence. Consequently, if the node
    2161             :     // defines multiple register classes, we don't know which to pressurize
    2162             :     // here. Instead the following loop consumes the register defs in an
    2163             :     // arbitrary order. At least it handles the common case of clustered loads
    2164             :     // to the same class. For precise liveness, each SDep needs to indicate the
    2165             :     // result number. But that tightly couples the ScheduleDAG with the
    2166             :     // SelectionDAG making updates tricky. A simpler hack would be to attach a
    2167             :     // value type or register class to SDep.
    2168             :     //
    2169             :     // The most important aspect of register tracking is balancing the increase
    2170             :     // here with the reduction further below. Note that this SU may use multiple
    2171             :     // defs in PredSU. The can't be determined here, but we've already
    2172             :     // compensated by reducing NumRegDefsLeft in PredSU during
    2173             :     // ScheduleDAGSDNodes::AddSchedEdges.
    2174      146727 :     --PredSU->NumRegDefsLeft;
    2175      146727 :     unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
    2176      147314 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2177      147901 :          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2178      147314 :       if (SkipRegDefs)
    2179         587 :         continue;
    2180             : 
    2181             :       unsigned RCId, Cost;
    2182      146727 :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2183      293454 :       RegPressure[RCId] += Cost;
    2184      146727 :       break;
    2185             :     }
    2186             :   }
    2187             : 
    2188             :   // We should have this assert, but there may be dead SDNodes that never
    2189             :   // materialize as SUnits, so they don't appear to generate liveness.
    2190             :   //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
    2191      222816 :   int SkipRegDefs = (int)SU->NumRegDefsLeft;
    2192      373280 :   for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
    2193      523744 :        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2194      150464 :     if (SkipRegDefs > 0)
    2195          11 :       continue;
    2196             :     unsigned RCId, Cost;
    2197      150453 :     GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2198      300906 :     if (RegPressure[RCId] < Cost) {
    2199             :       // Register pressure tracking is imprecise. This can happen. But we try
    2200             :       // hard not to let it happen because it likely results in poor scheduling.
    2201             :       LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum
    2202             :                         << ") has too many regdefs\n");
    2203        3738 :       RegPressure[RCId] = 0;
    2204             :     }
    2205             :     else {
    2206      146715 :       RegPressure[RCId] -= Cost;
    2207             :     }
    2208             :   }
    2209             :   LLVM_DEBUG(dumpRegPressure());
    2210             : }
    2211             : 
    2212        8504 : void RegReductionPQBase::unscheduledNode(SUnit *SU) {
    2213        8504 :   if (!TracksRegPressure)
    2214             :     return;
    2215             : 
    2216          70 :   const SDNode *N = SU->getNode();
    2217          70 :   if (!N) return;
    2218             : 
    2219          70 :   if (!N->isMachineOpcode()) {
    2220          11 :     if (N->getOpcode() != ISD::CopyToReg)
    2221             :       return;
    2222             :   } else {
    2223             :     unsigned Opc = N->getMachineOpcode();
    2224         118 :     if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2225          59 :         Opc == TargetOpcode::INSERT_SUBREG ||
    2226          59 :         Opc == TargetOpcode::SUBREG_TO_REG ||
    2227          98 :         Opc == TargetOpcode::REG_SEQUENCE ||
    2228          49 :         Opc == TargetOpcode::IMPLICIT_DEF)
    2229             :       return;
    2230             :   }
    2231             : 
    2232         323 :   for (const SDep &Pred : SU->Preds) {
    2233         137 :     if (Pred.isCtrl())
    2234          11 :       continue;
    2235             :     SUnit *PredSU = Pred.getSUnit();
    2236             :     // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
    2237             :     // counts data deps.
    2238         252 :     if (PredSU->NumSuccsLeft != PredSU->Succs.size())
    2239          58 :       continue;
    2240          68 :     const SDNode *PN = PredSU->getNode();
    2241          68 :     if (!PN->isMachineOpcode()) {
    2242           8 :       if (PN->getOpcode() == ISD::CopyFromReg) {
    2243           8 :         MVT VT = PN->getSimpleValueType(0);
    2244           8 :         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2245          16 :         RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2246             :       }
    2247           8 :       continue;
    2248             :     }
    2249             :     unsigned POpc = PN->getMachineOpcode();
    2250          60 :     if (POpc == TargetOpcode::IMPLICIT_DEF)
    2251           0 :       continue;
    2252         120 :     if (POpc == TargetOpcode::EXTRACT_SUBREG ||
    2253          60 :         POpc == TargetOpcode::INSERT_SUBREG ||
    2254          60 :         POpc == TargetOpcode::SUBREG_TO_REG) {
    2255           2 :       MVT VT = PN->getSimpleValueType(0);
    2256           2 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2257           4 :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2258           2 :       continue;
    2259             :     }
    2260         116 :     unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
    2261         146 :     for (unsigned i = 0; i != NumDefs; ++i) {
    2262          44 :       MVT VT = PN->getSimpleValueType(i);
    2263          44 :       if (!PN->hasAnyUseOfValue(i))
    2264           0 :         continue;
    2265          44 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2266          88 :       if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
    2267             :         // Register pressure tracking is imprecise. This can happen.
    2268          38 :         RegPressure[RCId] = 0;
    2269             :       else
    2270          50 :         RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
    2271             :     }
    2272             :   }
    2273             : 
    2274             :   // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
    2275             :   // may transfer data dependencies to CopyToReg.
    2276          49 :   if (SU->NumSuccs && N->isMachineOpcode()) {
    2277         123 :     unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2278         104 :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2279             :       MVT VT = N->getSimpleValueType(i);
    2280          11 :       if (VT == MVT::Glue || VT == MVT::Other)
    2281           0 :         continue;
    2282          19 :       if (!N->hasAnyUseOfValue(i))
    2283           8 :         continue;
    2284           3 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2285           6 :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2286             :     }
    2287             :   }
    2288             : 
    2289             :   LLVM_DEBUG(dumpRegPressure());
    2290             : }
    2291             : 
    2292             : //===----------------------------------------------------------------------===//
    2293             : //           Dynamic Node Priority for Register Pressure Reduction
    2294             : //===----------------------------------------------------------------------===//
    2295             : 
    2296             : /// closestSucc - Returns the scheduled cycle of the successor which is
    2297             : /// closest to the current cycle.
    2298    13948934 : static unsigned closestSucc(const SUnit *SU) {
    2299             :   unsigned MaxHeight = 0;
    2300    48347016 :   for (const SDep &Succ : SU->Succs) {
    2301    17199041 :     if (Succ.isCtrl()) continue;  // ignore chain succs
    2302             :     unsigned Height = Succ.getSUnit()->getHeight();
    2303             :     // If there are bunch of CopyToRegs stacked up, they should be considered
    2304             :     // to be at the same position.
    2305    26374072 :     if (Succ.getSUnit()->getNode() &&
    2306    13187036 :         Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
    2307      254264 :       Height = closestSucc(Succ.getSUnit())+1;
    2308    13187036 :     if (Height > MaxHeight)
    2309             :       MaxHeight = Height;
    2310             :   }
    2311    13948934 :   return MaxHeight;
    2312             : }
    2313             : 
    2314             : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
    2315             : /// for scratch registers, i.e. number of data dependencies.
    2316             : static unsigned calcMaxScratches(const SUnit *SU) {
    2317             :   unsigned Scratches = 0;
    2318    19390932 :   for (const SDep &Pred : SU->Preds) {
    2319     7501446 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2320     5693693 :     Scratches++;
    2321             :   }
    2322             :   return Scratches;
    2323             : }
    2324             : 
    2325             : /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
    2326             : /// CopyFromReg from a virtual register.
    2327       62049 : static bool hasOnlyLiveInOpers(const SUnit *SU) {
    2328             :   bool RetVal = false;
    2329      138919 :   for (const SDep &Pred : SU->Preds) {
    2330       67389 :     if (Pred.isCtrl()) continue;
    2331             :     const SUnit *PredSU = Pred.getSUnit();
    2332       84524 :     if (PredSU->getNode() &&
    2333       42262 :         PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
    2334             :       unsigned Reg =
    2335       30076 :         cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
    2336       15038 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2337             :         RetVal = true;
    2338       13308 :         continue;
    2339             :       }
    2340             :     }
    2341             :     return false;
    2342             :   }
    2343             :   return RetVal;
    2344             : }
    2345             : 
    2346             : /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
    2347             : /// CopyToReg to a virtual register. This SU def is probably a liveout and
    2348             : /// it has no other use. It should be scheduled closer to the terminator.
    2349        7372 : static bool hasOnlyLiveOutUses(const SUnit *SU) {
    2350             :   bool RetVal = false;
    2351       14010 :   for (const SDep &Succ : SU->Succs) {
    2352        8906 :     if (Succ.isCtrl()) continue;
    2353             :     const SUnit *SuccSU = Succ.getSUnit();
    2354        7101 :     if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
    2355             :       unsigned Reg =
    2356        3038 :         cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
    2357        1519 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2358             :         RetVal = true;
    2359        1514 :         continue;
    2360             :       }
    2361             :     }
    2362             :     return false;
    2363             :   }
    2364             :   return RetVal;
    2365             : }
    2366             : 
    2367             : // Set isVRegCycle for a node with only live in opers and live out uses. Also
    2368             : // set isVRegCycle for its CopyFromReg operands.
    2369             : //
    2370             : // This is only relevant for single-block loops, in which case the VRegCycle
    2371             : // node is likely an induction variable in which the operand and target virtual
    2372             : // registers should be coalesced (e.g. pre/post increment values). Setting the
    2373             : // isVRegCycle flag helps the scheduler prioritize other uses of the same
    2374             : // CopyFromReg so that this node becomes the virtual register "kill". This
    2375             : // avoids interference between the values live in and out of the block and
    2376             : // eliminates a copy inside the loop.
    2377       62049 : static void initVRegCycle(SUnit *SU) {
    2378       62049 :   if (DisableSchedVRegCycle)
    2379             :     return;
    2380             : 
    2381       62049 :   if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
    2382             :     return;
    2383             : 
    2384             :   LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
    2385             : 
    2386        1047 :   SU->isVRegCycle = true;
    2387             : 
    2388        3597 :   for (const SDep &Pred : SU->Preds) {
    2389        1275 :     if (Pred.isCtrl()) continue;
    2390        1250 :     Pred.getSUnit()->isVRegCycle = true;
    2391             :   }
    2392             : }
    2393             : 
    2394             : // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
    2395             : // CopyFromReg operands. We should no longer penalize other uses of this VReg.
    2396     4021772 : static void resetVRegCycle(SUnit *SU) {
    2397     4021772 :   if (!SU->isVRegCycle)
    2398             :     return;
    2399             : 
    2400        3597 :   for (const SDep &Pred : SU->Preds) {
    2401        1275 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2402             :     SUnit *PredSU = Pred.getSUnit();
    2403        1250 :     if (PredSU->isVRegCycle) {
    2404             :       assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
    2405             :              "VRegCycle def must be CopyFromReg");
    2406        1198 :       Pred.getSUnit()->isVRegCycle = false;
    2407             :     }
    2408             :   }
    2409             : }
    2410             : 
    2411             : // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
    2412             : // means a node that defines the VRegCycle has not been scheduled yet.
    2413     4516432 : static bool hasVRegCycleUse(const SUnit *SU) {
    2414             :   // If this SU also defines the VReg, don't hoist it as a "use".
    2415     4516432 :   if (SU->isVRegCycle)
    2416             :     return false;
    2417             : 
    2418    19126285 :   for (const SDep &Pred : SU->Preds) {
    2419     7305366 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2420     5603412 :     if (Pred.getSUnit()->isVRegCycle &&
    2421        1131 :         Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
    2422             :       LLVM_DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
    2423             :       return true;
    2424             :     }
    2425             :   }
    2426             :   return false;
    2427             : }
    2428             : 
    2429             : // Check for either a dependence (latency) or resource (hazard) stall.
    2430             : //
    2431             : // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
    2432             : static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
    2433     4312359 :   if ((int)SPQ->getCurCycle() < Height) return true;
    2434     8537320 :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2435             :       != ScheduleHazardRecognizer::NoHazard)
    2436             :     return true;
    2437             :   return false;
    2438             : }
    2439             : 
    2440             : // Return -1 if left has higher priority, 1 if right has higher priority.
    2441             : // Return 0 if latency-based priority is equivalent.
    2442     2258216 : static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
    2443             :                             RegReductionPQBase *SPQ) {
    2444             :   // Scheduling an instruction that uses a VReg whose postincrement has not yet
    2445             :   // been scheduled will induce a copy. Model this as an extra cycle of latency.
    2446     2258216 :   int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
    2447     2258216 :   int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
    2448     2258216 :   int LHeight = (int)left->getHeight() + LPenalty;
    2449     2258216 :   int RHeight = (int)right->getHeight() + RPenalty;
    2450             : 
    2451     2258216 :   bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
    2452             :     BUHasStall(left, LHeight, SPQ);
    2453     2258216 :   bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
    2454             :     BUHasStall(right, RHeight, SPQ);
    2455             : 
    2456             :   // If scheduling one of the node will cause a pipeline stall, delay it.
    2457             :   // If scheduling either one of the node will cause a pipeline stall, sort
    2458             :   // them according to their height.
    2459     2258216 :   if (LStall) {
    2460       43720 :     if (!RStall)
    2461             :       return 1;
    2462       37292 :     if (LHeight != RHeight)
    2463        5811 :       return LHeight > RHeight ? 1 : -1;
    2464     2214496 :   } else if (RStall)
    2465             :     return -1;
    2466             : 
    2467             :   // If either node is scheduling for latency, sort them by height/depth
    2468             :   // and latency.
    2469     2334406 :   if (!checkPref || (left->SchedulingPref == Sched::ILP ||
    2470       96900 :                      right->SchedulingPref == Sched::ILP)) {
    2471             :     // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
    2472             :     // is enabled, grouping instructions by cycle, then its height is already
    2473             :     // covered so only its depth matters. We also reach this point if both stall
    2474             :     // but have the same height.
    2475     4284288 :     if (!SPQ->getHazardRec()->isEnabled()) {
    2476     2037140 :       if (LHeight != RHeight)
    2477      140856 :         return LHeight > RHeight ? 1 : -1;
    2478             :     }
    2479     2001288 :     int LDepth = left->getDepth() - LPenalty;
    2480     2001288 :     int RDepth = right->getDepth() - RPenalty;
    2481     2001288 :     if (LDepth != RDepth) {
    2482             :       LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
    2483             :                         << ") depth " << LDepth << " vs SU (" << right->NodeNum
    2484             :                         << ") depth " << RDepth << "\n");
    2485      221872 :       return LDepth < RDepth ? 1 : -1;
    2486             :     }
    2487     1779416 :     if (left->Latency != right->Latency)
    2488        2433 :       return left->Latency > right->Latency ? 1 : -1;
    2489             :   }
    2490             :   return 0;
    2491             : }
    2492             : 
    2493     7501904 : static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
    2494             :   // Schedule physical register definitions close to their use. This is
    2495             :   // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
    2496             :   // long as shortening physreg live ranges is generally good, we can defer
    2497             :   // creating a subtarget hook.
    2498     7501904 :   if (!DisableSchedPhysRegJoin) {
    2499     7501904 :     bool LHasPhysReg = left->hasPhysRegDefs;
    2500     7501904 :     bool RHasPhysReg = right->hasPhysRegDefs;
    2501     7501904 :     if (LHasPhysReg != RHasPhysReg) {
    2502             :       #ifndef NDEBUG
    2503             :       static const char *const PhysRegMsg[] = { " has no physreg",
    2504             :                                                 " defines a physreg" };
    2505             :       #endif
    2506             :       LLVM_DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
    2507             :                         << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
    2508             :                         << ") " << PhysRegMsg[RHasPhysReg] << "\n");
    2509        7961 :       return LHasPhysReg < RHasPhysReg;
    2510             :     }
    2511             :   }
    2512             : 
    2513             :   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
    2514     7493943 :   unsigned LPriority = SPQ->getNodePriority(left);
    2515     7493943 :   unsigned RPriority = SPQ->getNodePriority(right);
    2516             : 
    2517             :   // Be really careful about hoisting call operands above previous calls.
    2518             :   // Only allows it if it would reduce register pressure.
    2519     7493943 :   if (left->isCall && right->isCallOp) {
    2520         839 :     unsigned RNumVals = right->getNode()->getNumValues();
    2521         839 :     RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
    2522             :   }
    2523     7493943 :   if (right->isCall && left->isCallOp) {
    2524         865 :     unsigned LNumVals = left->getNode()->getNumValues();
    2525         865 :     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
    2526             :   }
    2527             : 
    2528     7493943 :   if (LPriority != RPriority)
    2529      645022 :     return LPriority > RPriority;
    2530             : 
    2531             :   // One or both of the nodes are calls and their sethi-ullman numbers are the
    2532             :   // same, then keep source order.
    2533     6848921 :   if (left->isCall || right->isCall) {
    2534             :     unsigned LOrder = SPQ->getNodeOrdering(left);
    2535             :     unsigned ROrder = SPQ->getNodeOrdering(right);
    2536             : 
    2537             :     // Prefer an ordering where the lower the non-zero order number, the higher
    2538             :     // the preference.
    2539        5516 :     if ((LOrder || ROrder) && LOrder != ROrder)
    2540        1586 :       return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2541             :   }
    2542             : 
    2543             :   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
    2544             :   // e.g.
    2545             :   // t1 = op t2, c1
    2546             :   // t3 = op t4, c2
    2547             :   //
    2548             :   // and the following instructions are both ready.
    2549             :   // t2 = op c3
    2550             :   // t4 = op c4
    2551             :   //
    2552             :   // Then schedule t2 = op first.
    2553             :   // i.e.
    2554             :   // t4 = op c4
    2555             :   // t2 = op c3
    2556             :   // t1 = op t2, c1
    2557             :   // t3 = op t4, c2
    2558             :   //
    2559             :   // This creates more short live intervals.
    2560     6847335 :   unsigned LDist = closestSucc(left);
    2561     6847335 :   unsigned RDist = closestSucc(right);
    2562     6847335 :   if (LDist != RDist)
    2563     4653315 :     return LDist < RDist;
    2564             : 
    2565             :   // How many registers becomes live when the node is scheduled.
    2566             :   unsigned LScratch = calcMaxScratches(left);
    2567             :   unsigned RScratch = calcMaxScratches(right);
    2568     2194020 :   if (LScratch != RScratch)
    2569       86310 :     return LScratch > RScratch;
    2570             : 
    2571             :   // Comparing latency against a call makes little sense unless the node
    2572             :   // is register pressure-neutral.
    2573     2107710 :   if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
    2574         346 :     return (left->NodeQueueId > right->NodeQueueId);
    2575             : 
    2576             :   // Do not compare latencies when one or both of the nodes are calls.
    2577     2107364 :   if (!DisableSchedCycles &&
    2578     2107357 :       !(left->isCall || right->isCall)) {
    2579     2107337 :     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
    2580     2107337 :     if (result != 0)
    2581      343653 :       return result > 0;
    2582             :   }
    2583             :   else {
    2584          27 :     if (left->getHeight() != right->getHeight())
    2585           0 :       return left->getHeight() > right->getHeight();
    2586             : 
    2587          27 :     if (left->getDepth() != right->getDepth())
    2588           8 :       return left->getDepth() < right->getDepth();
    2589             :   }
    2590             : 
    2591             :   assert(left->NodeQueueId && right->NodeQueueId &&
    2592             :          "NodeQueueId cannot be zero");
    2593     1763703 :   return (left->NodeQueueId > right->NodeQueueId);
    2594             : }
    2595             : 
    2596             : // Bottom up
    2597             : bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2598             :   if (int res = checkSpecialNodes(left, right))
    2599         314 :     return res > 0;
    2600             : 
    2601       60612 :   return BURRSort(left, right, SPQ);
    2602             : }
    2603             : 
    2604             : // Source order, otherwise bottom up.
    2605    12734009 : bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2606             :   if (int res = checkSpecialNodes(left, right))
    2607       69910 :     return res > 0;
    2608             : 
    2609    12664099 :   unsigned LOrder = SPQ->getNodeOrdering(left);
    2610             :   unsigned ROrder = SPQ->getNodeOrdering(right);
    2611             : 
    2612             :   // Prefer an ordering where the lower the non-zero order number, the higher
    2613             :   // the preference.
    2614    12664099 :   if ((LOrder || ROrder) && LOrder != ROrder)
    2615     5460086 :     return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2616             : 
    2617     7204013 :   return BURRSort(left, right, SPQ);
    2618             : }
    2619             : 
    2620             : // If the time between now and when the instruction will be ready can cover
    2621             : // the spill code, then avoid adding it to the ready queue. This gives long
    2622             : // stalls highest priority and allows hoisting across calls. It should also
    2623             : // speed up processing the available queue.
    2624             : bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2625             :   static const unsigned ReadyDelay = 3;
    2626             : 
    2627             :   if (SPQ->MayReduceRegPressure(SU)) return true;
    2628             : 
    2629             :   if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
    2630             : 
    2631             :   if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
    2632             :       != ScheduleHazardRecognizer::NoHazard)
    2633             :     return false;
    2634             : 
    2635             :   return true;
    2636             : }
    2637             : 
    2638             : // Return true if right should be scheduled with higher priority than left.
    2639      171608 : bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2640             :   if (int res = checkSpecialNodes(left, right))
    2641        1305 :     return res > 0;
    2642             : 
    2643      338623 :   if (left->isCall || right->isCall)
    2644             :     // No way to compute latency of calls.
    2645        3281 :     return BURRSort(left, right, SPQ);
    2646             : 
    2647      167022 :   bool LHigh = SPQ->HighRegPressure(left);
    2648      167022 :   bool RHigh = SPQ->HighRegPressure(right);
    2649             :   // Avoid causing spills. If register pressure is high, schedule for
    2650             :   // register pressure reduction.
    2651      167022 :   if (LHigh && !RHigh) {
    2652             :     LLVM_DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
    2653             :                       << right->NodeNum << ")\n");
    2654             :     return true;
    2655             :   }
    2656      165555 :   else if (!LHigh && RHigh) {
    2657             :     LLVM_DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
    2658             :                       << left->NodeNum << ")\n");
    2659             :     return false;
    2660             :   }
    2661      155396 :   if (!LHigh && !RHigh) {
    2662      150879 :     int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
    2663      150879 :     if (result != 0)
    2664       42218 :       return result > 0;
    2665             :   }
    2666      113178 :   return BURRSort(left, right, SPQ);
    2667             : }
    2668             : 
    2669             : // Schedule as many instructions in each cycle as possible. So don't make an
    2670             : // instruction available unless it is ready in the current cycle.
    2671             : bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2672             :   if (SU->getHeight() > CurCycle) return false;
    2673             : 
    2674             :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2675             :       != ScheduleHazardRecognizer::NoHazard)
    2676             :     return false;
    2677             : 
    2678             :   return true;
    2679             : }
    2680             : 
    2681       34534 : static bool canEnableCoalescing(SUnit *SU) {
    2682       34534 :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2683       34534 :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2684             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2685             :     // avoid spilling.
    2686             :     return true;
    2687             : 
    2688       52812 :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2689       52812 :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2690             :       Opc == TargetOpcode::INSERT_SUBREG)
    2691             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2692             :     // close to their uses to facilitate coalescing.
    2693             :     return true;
    2694             : 
    2695       26406 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2696             :     // If SU does not have a register def, schedule it close to its uses
    2697             :     // because it does not lengthen any live ranges.
    2698             :     return true;
    2699             : 
    2700       26406 :   return false;
    2701             : }
    2702             : 
    2703             : // list-ilp is currently an experimental scheduler that allows various
    2704             : // heuristics to be enabled prior to the normal register reduction logic.
    2705      151395 : bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2706             :   if (int res = checkSpecialNodes(left, right))
    2707        1870 :     return res > 0;
    2708             : 
    2709      296933 :   if (left->isCall || right->isCall)
    2710             :     // No way to compute latency of calls.
    2711        3841 :     return BURRSort(left, right, SPQ);
    2712             : 
    2713      145684 :   unsigned LLiveUses = 0, RLiveUses = 0;
    2714             :   int LPDiff = 0, RPDiff = 0;
    2715      145684 :   if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
    2716      145684 :     LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
    2717      145684 :     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
    2718             :   }
    2719      145684 :   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
    2720             :     LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
    2721             :                       << "): " << LPDiff << " != SU(" << right->NodeNum
    2722             :                       << "): " << RPDiff << "\n");
    2723       17262 :     return LPDiff > RPDiff;
    2724             :   }
    2725             : 
    2726      256844 :   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
    2727       17267 :     bool LReduce = canEnableCoalescing(left);
    2728       17267 :     bool RReduce = canEnableCoalescing(right);
    2729       17267 :     if (LReduce && !RReduce) return false;
    2730       16850 :     if (RReduce && !LReduce) return true;
    2731             :   }
    2732             : 
    2733      127942 :   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
    2734             :     LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
    2735             :                       << " != SU(" << right->NodeNum << "): " << RLiveUses
    2736             :                       << "\n");
    2737           0 :     return LLiveUses < RLiveUses;
    2738             :   }
    2739             : 
    2740      127942 :   if (!DisableSchedStalls) {
    2741           0 :     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
    2742           0 :     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
    2743           0 :     if (LStall != RStall)
    2744           0 :       return left->getHeight() > right->getHeight();
    2745             :   }
    2746             : 
    2747      127942 :   if (!DisableSchedCriticalPath) {
    2748      255884 :     int spread = (int)left->getDepth() - (int)right->getDepth();
    2749      255884 :     if (std::abs(spread) > MaxReorderWindow) {
    2750             :       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
    2751             :                         << left->getDepth() << " != SU(" << right->NodeNum
    2752             :                         << "): " << right->getDepth() << "\n");
    2753        2542 :       return left->getDepth() < right->getDepth();
    2754             :     }
    2755             :   }
    2756             : 
    2757      250800 :   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    2758       93618 :     int spread = (int)left->getHeight() - (int)right->getHeight();
    2759       93618 :     if (std::abs(spread) > MaxReorderWindow)
    2760        8421 :       return left->getHeight() > right->getHeight();
    2761             :   }
    2762             : 
    2763      116979 :   return BURRSort(left, right, SPQ);
    2764             : }
    2765             : 
    2766      363255 : void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
    2767      363255 :   SUnits = &sunits;
    2768             :   // Add pseudo dependency edges for two-address nodes.
    2769      363255 :   if (!Disable2AddrHack)
    2770           0 :     AddPseudoTwoAddrDeps();
    2771             :   // Reroute edges to nodes with multiple uses.
    2772      363255 :   if (!TracksRegPressure && !SrcOrder)
    2773        7644 :     PrescheduleNodesWithMultipleUses();
    2774             :   // Calculate node priorities.
    2775      363255 :   CalculateSethiUllmanNumbers();
    2776             : 
    2777             :   // For single block loops, mark nodes that look like canonical IV increments.
    2778      363255 :   if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
    2779       65810 :     for (SUnit &SU : sunits)
    2780       62049 :       initVRegCycle(&SU);
    2781      363255 : }
    2782             : 
    2783             : //===----------------------------------------------------------------------===//
    2784             : //                    Preschedule for Register Pressure
    2785             : //===----------------------------------------------------------------------===//
    2786             : 
    2787           0 : bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
    2788           0 :   if (SU->isTwoAddress) {
    2789           0 :     unsigned Opc = SU->getNode()->getMachineOpcode();
    2790           0 :     const MCInstrDesc &MCID = TII->get(Opc);
    2791           0 :     unsigned NumRes = MCID.getNumDefs();
    2792           0 :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    2793           0 :     for (unsigned i = 0; i != NumOps; ++i) {
    2794           0 :       if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
    2795           0 :         SDNode *DU = SU->getNode()->getOperand(i).getNode();
    2796           0 :         if (DU->getNodeId() != -1 &&
    2797           0 :             Op->OrigNode == &(*SUnits)[DU->getNodeId()])
    2798             :           return true;
    2799             :       }
    2800             :     }
    2801             :   }
    2802             :   return false;
    2803             : }
    2804             : 
    2805             : /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
    2806             : /// successor's explicit physregs whose definition can reach DepSU.
    2807             : /// i.e. DepSU should not be scheduled above SU.
    2808           0 : static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
    2809             :                                          ScheduleDAGRRList *scheduleDAG,
    2810             :                                          const TargetInstrInfo *TII,
    2811             :                                          const TargetRegisterInfo *TRI) {
    2812             :   const MCPhysReg *ImpDefs
    2813           0 :     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
    2814             :   const uint32_t *RegMask = getNodeRegMask(SU->getNode());
    2815           0 :   if(!ImpDefs && !RegMask)
    2816             :     return false;
    2817             : 
    2818           0 :   for (const SDep &Succ : SU->Succs) {
    2819             :     SUnit *SuccSU = Succ.getSUnit();
    2820           0 :     for (const SDep &SuccPred : SuccSU->Preds) {
    2821           0 :       if (!SuccPred.isAssignedRegDep())
    2822           0 :         continue;
    2823             : 
    2824           0 :       if (RegMask &&
    2825           0 :           MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
    2826             :           scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2827             :         return true;
    2828             : 
    2829           0 :       if (ImpDefs)
    2830           0 :         for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
    2831             :           // Return true if SU clobbers this physical register use and the
    2832             :           // definition of the register reaches from DepSU. IsReachable queries
    2833             :           // a topological forward sort of the DAG (following the successors).
    2834           0 :           if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
    2835             :               scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2836             :             return true;
    2837             :     }
    2838             :   }
    2839             :   return false;
    2840             : }
    2841             : 
    2842             : /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
    2843             : /// physical register defs.
    2844           4 : static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
    2845             :                                   const TargetInstrInfo *TII,
    2846             :                                   const TargetRegisterInfo *TRI) {
    2847           4 :   SDNode *N = SuccSU->getNode();
    2848          12 :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2849           4 :   const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
    2850             :   assert(ImpDefs && "Caller should check hasPhysRegDefs");
    2851           4 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
    2852             :        SUNode = SUNode->getGluedNode()) {
    2853           8 :     if (!SUNode->isMachineOpcode())
    2854           0 :       continue;
    2855             :     const MCPhysReg *SUImpDefs =
    2856          16 :       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
    2857             :     const uint32_t *SURegMask = getNodeRegMask(SUNode);
    2858           8 :     if (!SUImpDefs && !SURegMask)
    2859           0 :       continue;
    2860          24 :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2861             :       MVT VT = N->getSimpleValueType(i);
    2862           8 :       if (VT == MVT::Glue || VT == MVT::Other)
    2863           0 :         continue;
    2864           8 :       if (!N->hasAnyUseOfValue(i))
    2865           0 :         continue;
    2866           8 :       unsigned Reg = ImpDefs[i - NumDefs];
    2867          12 :       if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
    2868             :         return true;
    2869           4 :       if (!SUImpDefs)
    2870           0 :         continue;
    2871          12 :       for (;*SUImpDefs; ++SUImpDefs) {
    2872           4 :         unsigned SUReg = *SUImpDefs;
    2873           4 :         if (TRI->regsOverlap(Reg, SUReg))
    2874             :           return true;
    2875             :       }
    2876             :     }
    2877             :   }
    2878             :   return false;
    2879             : }
    2880             : 
    2881             : /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
    2882             : /// are not handled well by the general register pressure reduction
    2883             : /// heuristics. When presented with code like this:
    2884             : ///
    2885             : ///      N
    2886             : ///    / |
    2887             : ///   /  |
    2888             : ///  U  store
    2889             : ///  |
    2890             : /// ...
    2891             : ///
    2892             : /// the heuristics tend to push the store up, but since the
    2893             : /// operand of the store has another use (U), this would increase
    2894             : /// the length of that other use (the U->N edge).
    2895             : ///
    2896             : /// This function transforms code like the above to route U's
    2897             : /// dependence through the store when possible, like this:
    2898             : ///
    2899             : ///      N
    2900             : ///      ||
    2901             : ///      ||
    2902             : ///     store
    2903             : ///       |
    2904             : ///       U
    2905             : ///       |
    2906             : ///      ...
    2907             : ///
    2908             : /// This results in the store being scheduled immediately
    2909             : /// after N, which shortens the U->N live range, reducing
    2910             : /// register pressure.
    2911        7644 : void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
    2912             :   // Visit all the nodes in topological order, working top-down.
    2913       65623 :   for (SUnit &SU : *SUnits) {
    2914             :     // For now, only look at nodes with no data successors, such as stores.
    2915             :     // These are especially important, due to the heuristics in
    2916             :     // getNodePriority for nodes with no data successors.
    2917       50335 :     if (SU.NumSuccs != 0)
    2918       28467 :       continue;
    2919             :     // For now, only look at nodes with exactly one data predecessor.
    2920       21868 :     if (SU.NumPreds != 1)
    2921       14482 :       continue;
    2922             :     // Avoid prescheduling copies to virtual registers, which don't behave
    2923             :     // like other nodes from the perspective of scheduling heuristics.
    2924        7386 :     if (SDNode *N = SU.getNode())
    2925        8883 :       if (N->getOpcode() == ISD::CopyToReg &&
    2926             :           TargetRegisterInfo::isVirtualRegister
    2927        2994 :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2928        1438 :         continue;
    2929             : 
    2930             :     // Locate the single data predecessor.
    2931             :     SUnit *PredSU = nullptr;
    2932        7736 :     for (const SDep &Pred : SU.Preds)
    2933        6842 :       if (!Pred.isCtrl()) {
    2934             :         PredSU = Pred.getSUnit();
    2935        5948 :         break;
    2936             :       }
    2937             :     assert(PredSU);
    2938             : 
    2939             :     // Don't rewrite edges that carry physregs, because that requires additional
    2940             :     // support infrastructure.
    2941        5948 :     if (PredSU->hasPhysRegDefs)
    2942          13 :       continue;
    2943             :     // Short-circuit the case where SU is PredSU's only data successor.
    2944        5935 :     if (PredSU->NumSuccs == 1)
    2945        5311 :       continue;
    2946             :     // Avoid prescheduling to copies from virtual registers, which don't behave
    2947             :     // like other nodes from the perspective of scheduling heuristics.
    2948         624 :     if (SDNode *N = SU.getNode())
    2949         631 :       if (N->getOpcode() == ISD::CopyFromReg &&
    2950             :           TargetRegisterInfo::isVirtualRegister
    2951          14 :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2952           0 :         continue;
    2953             : 
    2954             :     // Perform checks on the successors of PredSU.
    2955        1538 :     for (const SDep &PredSucc : PredSU->Succs) {
    2956             :       SUnit *PredSuccSU = PredSucc.getSUnit();
    2957        1016 :       if (PredSuccSU == &SU) continue;
    2958             :       // If PredSU has another successor with no data successors, for
    2959             :       // now don't attempt to choose either over the other.
    2960         651 :       if (PredSuccSU->NumSuccs == 0)
    2961             :         goto outer_loop_continue;
    2962             :       // Don't break physical register dependencies.
    2963         120 :       if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
    2964           4 :         if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
    2965             :           goto outer_loop_continue;
    2966             :       // Don't introduce graph cycles.
    2967         232 :       if (scheduleDAG->IsReachable(&SU, PredSuccSU))
    2968             :         goto outer_loop_continue;
    2969             :     }
    2970             : 
    2971             :     // Ok, the transformation is safe and the heuristics suggest it is
    2972             :     // profitable. Update the graph.
    2973             :     LLVM_DEBUG(
    2974             :         dbgs() << "    Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
    2975             :                << PredSU->NodeNum
    2976             :                << " to guide scheduling in the presence of multiple uses\n");
    2977         556 :     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
    2978         142 :       SDep Edge = PredSU->Succs[i];
    2979             :       assert(!Edge.isAssignedRegDep());
    2980             :       SUnit *SuccSU = Edge.getSUnit();
    2981         142 :       if (SuccSU != &SU) {
    2982             :         Edge.setSUnit(PredSU);
    2983          77 :         scheduleDAG->RemovePred(SuccSU, Edge);
    2984          77 :         scheduleDAG->AddPred(&SU, Edge);
    2985             :         Edge.setSUnit(&SU);
    2986          77 :         scheduleDAG->AddPred(SuccSU, Edge);
    2987          77 :         --i;
    2988             :       }
    2989             :     }
    2990         624 :   outer_loop_continue:;
    2991             :   }
    2992        7644 : }
    2993             : 
    2994             : /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
    2995             : /// it as a def&use operand. Add a pseudo control edge from it to the other
    2996             : /// node (if it won't create a cycle) so the two-address one will be scheduled
    2997             : /// first (lower in the schedule). If both nodes are two-address, favor the
    2998             : /// one that has a CopyToReg use (more likely to be a loop induction update).
    2999             : /// If both are two-address, but one is commutable while the other is not
    3000             : /// commutable, favor the one that's not commutable.
    3001           0 : void RegReductionPQBase::AddPseudoTwoAddrDeps() {
    3002           0 :   for (SUnit &SU : *SUnits) {
    3003           0 :     if (!SU.isTwoAddress)
    3004           0 :       continue;
    3005             : 
    3006           0 :     SDNode *Node = SU.getNode();
    3007           0 :     if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
    3008           0 :       continue;
    3009             : 
    3010           0 :     bool isLiveOut = hasOnlyLiveOutUses(&SU);
    3011             :     unsigned Opc = Node->getMachineOpcode();
    3012           0 :     const MCInstrDesc &MCID = TII->get(Opc);
    3013           0 :     unsigned NumRes = MCID.getNumDefs();
    3014           0 :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    3015           0 :     for (unsigned j = 0; j != NumOps; ++j) {
    3016           0 :       if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
    3017           0 :         continue;
    3018           0 :       SDNode *DU = SU.getNode()->getOperand(j).getNode();
    3019           0 :       if (DU->getNodeId() == -1)
    3020           0 :         continue;
    3021           0 :       const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
    3022           0 :       if (!DUSU)
    3023           0 :         continue;
    3024           0 :       for (const SDep &Succ : DUSU->Succs) {
    3025           0 :         if (Succ.isCtrl())
    3026           0 :           continue;
    3027             :         SUnit *SuccSU = Succ.getSUnit();
    3028           0 :         if (SuccSU == &SU)
    3029           0 :           continue;
    3030             :         // Be conservative. Ignore if nodes aren't at roughly the same
    3031             :         // depth and height.
    3032           0 :         if (SuccSU->getHeight() < SU.getHeight() &&
    3033           0 :             (SU.getHeight() - SuccSU->getHeight()) > 1)
    3034           0 :           continue;
    3035             :         // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
    3036             :         // constrains whatever is using the copy, instead of the copy
    3037             :         // itself. In the case that the copy is coalesced, this
    3038             :         // preserves the intent of the pseudo two-address heurietics.
    3039           0 :         while (SuccSU->Succs.size() == 1 &&
    3040           0 :                SuccSU->getNode()->isMachineOpcode() &&
    3041             :                SuccSU->getNode()->getMachineOpcode() ==
    3042             :                  TargetOpcode::COPY_TO_REGCLASS)
    3043             :           SuccSU = SuccSU->Succs.front().getSUnit();
    3044             :         // Don't constrain non-instruction nodes.
    3045           0 :         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
    3046           0 :           continue;
    3047             :         // Don't constrain nodes with physical register defs if the
    3048             :         // predecessor can clobber them.
    3049           0 :         if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
    3050           0 :           if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
    3051           0 :             continue;
    3052             :         }
    3053             :         // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
    3054             :         // these may be coalesced away. We want them close to their uses.
    3055           0 :         unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
    3056           0 :         if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
    3057           0 :             SuccOpc == TargetOpcode::INSERT_SUBREG ||
    3058           0 :             SuccOpc == TargetOpcode::SUBREG_TO_REG)
    3059           0 :           continue;
    3060           0 :         if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
    3061           0 :             (!canClobber(SuccSU, DUSU) ||
    3062           0 :              (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
    3063           0 :              (!SU.isCommutable && SuccSU->isCommutable)) &&
    3064           0 :             !scheduleDAG->IsReachable(SuccSU, &SU)) {
    3065             :           LLVM_DEBUG(dbgs()
    3066             :                      << "    Adding a pseudo-two-addr edge from SU #"
    3067             :                      << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
    3068           0 :           scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
    3069             :         }
    3070             :       }
    3071             :     }
    3072             :   }
    3073           0 : }
    3074             : 
    3075             : //===----------------------------------------------------------------------===//
    3076             : //                         Public Constructor Functions
    3077             : //===----------------------------------------------------------------------===//
    3078             : 
    3079             : ScheduleDAGSDNodes *
    3080        7644 : llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
    3081             :                                  CodeGenOpt::Level OptLevel) {
    3082        7644 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3083        7644 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3084        7644 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3085             : 
    3086             :   BURegReductionPriorityQueue *PQ =
    3087        7644 :     new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
    3088        7644 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3089             :   PQ->setScheduleDAG(SD);
    3090        7644 :   return SD;
    3091             : }
    3092             : 
    3093             : ScheduleDAGSDNodes *
    3094      326849 : llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
    3095             :                                    CodeGenOpt::Level OptLevel) {
    3096      326849 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3097      326849 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3098      326849 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3099             : 
    3100             :   SrcRegReductionPriorityQueue *PQ =
    3101      326849 :     new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
    3102      326849 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3103             :   PQ->setScheduleDAG(SD);
    3104      326849 :   return SD;
    3105             : }
    3106             : 
    3107             : ScheduleDAGSDNodes *
    3108       12952 : llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
    3109             :                                    CodeGenOpt::Level OptLevel) {
    3110       12952 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3111       12952 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3112       12952 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3113       12952 :   const TargetLowering *TLI = IS->TLI;
    3114             : 
    3115             :   HybridBURRPriorityQueue *PQ =
    3116       12952 :     new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3117             : 
    3118       12952 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3119             :   PQ->setScheduleDAG(SD);
    3120       12952 :   return SD;
    3121             : }
    3122             : 
    3123             : ScheduleDAGSDNodes *
    3124       15810 : llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
    3125             :                                 CodeGenOpt::Level OptLevel) {
    3126       15810 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3127       15810 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3128       15810 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3129       15810 :   const TargetLowering *TLI = IS->TLI;
    3130             : 
    3131             :   ILPBURRPriorityQueue *PQ =
    3132       15810 :     new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3133       15810 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3134             :   PQ->setScheduleDAG(SD);
    3135       15810 :   return SD;
    3136      299229 : }

Generated by: LCOV version 1.13