LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - ScheduleDAGRRList.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1035 1160 89.2 %
Date: 2018-02-23 15:42:53 Functions: 84 99 84.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This implements bottom-up and top-down register pressure reduction list
      11             : // schedulers, using standard algorithms.  The basic approach uses a priority
      12             : // queue of available nodes to schedule.  One at a time, nodes are taken from
      13             : // the priority queue (thus in priority order), checked for legality to
      14             : // schedule, and emitted if legal.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #include "ScheduleDAGSDNodes.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/DenseMap.h"
      21             : #include "llvm/ADT/STLExtras.h"
      22             : #include "llvm/ADT/SmallSet.h"
      23             : #include "llvm/ADT/SmallVector.h"
      24             : #include "llvm/ADT/Statistic.h"
      25             : #include "llvm/CodeGen/ISDOpcodes.h"
      26             : #include "llvm/CodeGen/MachineFunction.h"
      27             : #include "llvm/CodeGen/MachineOperand.h"
      28             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      29             : #include "llvm/CodeGen/MachineValueType.h"
      30             : #include "llvm/CodeGen/ScheduleDAG.h"
      31             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      32             : #include "llvm/CodeGen/SchedulerRegistry.h"
      33             : #include "llvm/CodeGen/SelectionDAGISel.h"
      34             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      35             : #include "llvm/CodeGen/TargetInstrInfo.h"
      36             : #include "llvm/CodeGen/TargetLowering.h"
      37             : #include "llvm/CodeGen/TargetOpcodes.h"
      38             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      39             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      40             : #include "llvm/IR/InlineAsm.h"
      41             : #include "llvm/MC/MCInstrDesc.h"
      42             : #include "llvm/MC/MCRegisterInfo.h"
      43             : #include "llvm/Support/Casting.h"
      44             : #include "llvm/Support/CodeGen.h"
      45             : #include "llvm/Support/CommandLine.h"
      46             : #include "llvm/Support/Compiler.h"
      47             : #include "llvm/Support/Debug.h"
      48             : #include "llvm/Support/ErrorHandling.h"
      49             : #include "llvm/Support/raw_ostream.h"
      50             : #include <algorithm>
      51             : #include <cassert>
      52             : #include <cstdint>
      53             : #include <cstdlib>
      54             : #include <iterator>
      55             : #include <limits>
      56             : #include <memory>
      57             : #include <utility>
      58             : #include <vector>
      59             : 
      60             : using namespace llvm;
      61             : 
      62             : #define DEBUG_TYPE "pre-RA-sched"
      63             : 
      64             : STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
      65             : STATISTIC(NumUnfolds,    "Number of nodes unfolded");
      66             : STATISTIC(NumDups,       "Number of duplicated nodes");
      67             : STATISTIC(NumPRCopies,   "Number of physical register copies");
      68             : 
      69             : static RegisterScheduler
      70       81686 :   burrListDAGScheduler("list-burr",
      71             :                        "Bottom-up register reduction list scheduling",
      72             :                        createBURRListDAGScheduler);
      73             : 
      74             : static RegisterScheduler
      75       81686 :   sourceListDAGScheduler("source",
      76             :                          "Similar to list-burr but schedules in source "
      77             :                          "order when possible",
      78             :                          createSourceListDAGScheduler);
      79             : 
      80             : static RegisterScheduler
      81       81686 :   hybridListDAGScheduler("list-hybrid",
      82             :                          "Bottom-up register pressure aware list scheduling "
      83             :                          "which tries to balance latency and register pressure",
      84             :                          createHybridListDAGScheduler);
      85             : 
      86             : static RegisterScheduler
      87       81686 :   ILPListDAGScheduler("list-ilp",
      88             :                       "Bottom-up register pressure aware list scheduling "
      89             :                       "which tries to balance ILP and register pressure",
      90             :                       createILPListDAGScheduler);
      91             : 
      92       81686 : static cl::opt<bool> DisableSchedCycles(
      93      163372 :   "disable-sched-cycles", cl::Hidden, cl::init(false),
      94      163372 :   cl::desc("Disable cycle-level precision during preRA scheduling"));
      95             : 
      96             : // Temporary sched=list-ilp flags until the heuristics are robust.
      97             : // Some options are also available under sched=list-hybrid.
      98       81686 : static cl::opt<bool> DisableSchedRegPressure(
      99      163372 :   "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
     100      163372 :   cl::desc("Disable regpressure priority in sched=list-ilp"));
     101       81686 : static cl::opt<bool> DisableSchedLiveUses(
     102      163372 :   "disable-sched-live-uses", cl::Hidden, cl::init(true),
     103      163372 :   cl::desc("Disable live use priority in sched=list-ilp"));
     104       81686 : static cl::opt<bool> DisableSchedVRegCycle(
     105      163372 :   "disable-sched-vrcycle", cl::Hidden, cl::init(false),
     106      163372 :   cl::desc("Disable virtual register cycle interference checks"));
     107       81686 : static cl::opt<bool> DisableSchedPhysRegJoin(
     108      163372 :   "disable-sched-physreg-join", cl::Hidden, cl::init(false),
     109      163372 :   cl::desc("Disable physreg def-use affinity"));
     110       81686 : static cl::opt<bool> DisableSchedStalls(
     111      163372 :   "disable-sched-stalls", cl::Hidden, cl::init(true),
     112      163372 :   cl::desc("Disable no-stall priority in sched=list-ilp"));
     113       81686 : static cl::opt<bool> DisableSchedCriticalPath(
     114      163372 :   "disable-sched-critical-path", cl::Hidden, cl::init(false),
     115      163372 :   cl::desc("Disable critical path priority in sched=list-ilp"));
     116       81686 : static cl::opt<bool> DisableSchedHeight(
     117      163372 :   "disable-sched-height", cl::Hidden, cl::init(false),
     118      163372 :   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
     119       81686 : static cl::opt<bool> Disable2AddrHack(
     120      163372 :   "disable-2addr-hack", cl::Hidden, cl::init(true),
     121      163372 :   cl::desc("Disable scheduler's two-address hack"));
     122             : 
     123       81686 : static cl::opt<int> MaxReorderWindow(
     124      163372 :   "max-sched-reorder", cl::Hidden, cl::init(6),
     125       81686 :   cl::desc("Number of instructions to allow ahead of the critical path "
     126       81686 :            "in sched=list-ilp"));
     127             : 
     128       81686 : static cl::opt<unsigned> AvgIPC(
     129      163372 :   "sched-avg-ipc", cl::Hidden, cl::init(1),
     130      163372 :   cl::desc("Average inst/cycle whan no target itinerary exists."));
     131             : 
     132             : namespace {
     133             : 
     134             : //===----------------------------------------------------------------------===//
     135             : /// ScheduleDAGRRList - The actual register reduction list scheduler
     136             : /// implementation.  This supports both top-down and bottom-up scheduling.
     137             : ///
     138             : class ScheduleDAGRRList : public ScheduleDAGSDNodes {
     139             : private:
     140             :   /// NeedLatency - True if the scheduler will make use of latency information.
     141             :   bool NeedLatency;
     142             : 
     143             :   /// AvailableQueue - The priority queue to use for the available SUnits.
     144             :   SchedulingPriorityQueue *AvailableQueue;
     145             : 
     146             :   /// PendingQueue - This contains all of the instructions whose operands have
     147             :   /// been issued, but their results are not ready yet (due to the latency of
     148             :   /// the operation).  Once the operands becomes available, the instruction is
     149             :   /// added to the AvailableQueue.
     150             :   std::vector<SUnit *> PendingQueue;
     151             : 
     152             :   /// HazardRec - The hazard recognizer to use.
     153             :   ScheduleHazardRecognizer *HazardRec;
     154             : 
     155             :   /// CurCycle - The current scheduler state corresponds to this cycle.
     156             :   unsigned CurCycle = 0;
     157             : 
     158             :   /// MinAvailableCycle - Cycle of the soonest available instruction.
     159             :   unsigned MinAvailableCycle;
     160             : 
     161             :   /// IssueCount - Count instructions issued in this cycle
     162             :   /// Currently valid only for bottom-up scheduling.
     163             :   unsigned IssueCount;
     164             : 
     165             :   /// LiveRegDefs - A set of physical registers and their definition
     166             :   /// that are "live". These nodes must be scheduled before any other nodes that
     167             :   /// modifies the registers can be scheduled.
     168             :   unsigned NumLiveRegs;
     169             :   std::unique_ptr<SUnit*[]> LiveRegDefs;
     170             :   std::unique_ptr<SUnit*[]> LiveRegGens;
     171             : 
     172             :   // Collect interferences between physical register use/defs.
     173             :   // Each interference is an SUnit and set of physical registers.
     174             :   SmallVector<SUnit*, 4> Interferences;
     175             : 
     176             :   using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
     177             : 
     178             :   LRegsMapT LRegsMap;
     179             : 
     180             :   /// Topo - A topological ordering for SUnits which permits fast IsReachable
     181             :   /// and similar queries.
     182             :   ScheduleDAGTopologicalSort Topo;
     183             : 
     184             :   // Hack to keep track of the inverse of FindCallSeqStart without more crazy
     185             :   // DAG crawling.
     186             :   DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
     187             : 
     188             : public:
     189      310148 :   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
     190             :                     SchedulingPriorityQueue *availqueue,
     191             :                     CodeGenOpt::Level OptLevel)
     192      310148 :     : ScheduleDAGSDNodes(mf),
     193             :       NeedLatency(needlatency), AvailableQueue(availqueue),
     194      930444 :       Topo(SUnits, nullptr) {
     195      310147 :     const TargetSubtargetInfo &STI = mf.getSubtarget();
     196      310147 :     if (DisableSchedCycles || !NeedLatency)
     197      564389 :       HazardRec = new ScheduleHazardRecognizer();
     198             :     else
     199       27953 :       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
     200      310148 :   }
     201             : 
     202     1240592 :   ~ScheduleDAGRRList() override {
     203      310148 :     delete HazardRec;
     204      310148 :     delete AvailableQueue;
     205      620296 :   }
     206             : 
     207             :   void Schedule() override;
     208             : 
     209             :   ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
     210             : 
     211             :   /// IsReachable - Checks if SU is reachable from TargetSU.
     212             :   bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
     213         102 :     return Topo.IsReachable(SU, TargetSU);
     214             :   }
     215             : 
     216             :   /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
     217             :   /// create a cycle.
     218             :   bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
     219       23806 :     return Topo.WillCreateCycle(SU, TargetSU);
     220             :   }
     221             : 
     222             :   /// AddPred - adds a predecessor edge to SUnit SU.
     223             :   /// This returns true if this is a new predecessor.
     224             :   /// Updates the topological ordering if required.
     225       97646 :   void AddPred(SUnit *SU, const SDep &D) {
     226      195292 :     Topo.AddPred(SU, D.getSUnit());
     227       97646 :     SU->addPred(D);
     228       97646 :   }
     229             : 
     230             :   /// RemovePred - removes a predecessor edge from SUnit SU.
     231             :   /// This returns true if an edge was removed.
     232             :   /// Updates the topological ordering if required.
     233       46629 :   void RemovePred(SUnit *SU, const SDep &D) {
     234       93258 :     Topo.RemovePred(SU, D.getSUnit());
     235       46629 :     SU->removePred(D);
     236       46629 :   }
     237             : 
     238             : private:
     239             :   bool isReady(SUnit *SU) {
     240     3439464 :     return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
     241           0 :       AvailableQueue->isReady(SU);
     242             :   }
     243             : 
     244             :   void ReleasePred(SUnit *SU, const SDep *PredEdge);
     245             :   void ReleasePredecessors(SUnit *SU);
     246             :   void ReleasePending();
     247             :   void AdvanceToCycle(unsigned NextCycle);
     248             :   void AdvancePastStalls(SUnit *SU);
     249             :   void EmitNode(SUnit *SU);
     250             :   void ScheduleNodeBottomUp(SUnit*);
     251             :   void CapturePred(SDep *PredEdge);
     252             :   void UnscheduleNodeBottomUp(SUnit*);
     253             :   void RestoreHazardCheckerBottomUp();
     254             :   void BacktrackBottomUp(SUnit*, SUnit*);
     255             :   SUnit *TryUnfoldSU(SUnit *);
     256             :   SUnit *CopyAndMoveSuccessors(SUnit*);
     257             :   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
     258             :                                 const TargetRegisterClass*,
     259             :                                 const TargetRegisterClass*,
     260             :                                 SmallVectorImpl<SUnit*>&);
     261             :   bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
     262             : 
     263             :   void releaseInterferences(unsigned Reg = 0);
     264             : 
     265             :   SUnit *PickNodeToScheduleBottomUp();
     266             :   void ListScheduleBottomUp();
     267             : 
     268             :   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
     269             :   /// Updates the topological ordering if required.
     270       12910 :   SUnit *CreateNewSUnit(SDNode *N) {
     271       25820 :     unsigned NumSUnits = SUnits.size();
     272       12910 :     SUnit *NewNode = newSUnit(N);
     273             :     // Update the topological ordering.
     274       12910 :     if (NewNode->NodeNum >= NumSUnits)
     275       12910 :       Topo.InitDAGTopologicalSorting();
     276       12910 :     return NewNode;
     277             :   }
     278             : 
     279             :   /// CreateClone - Creates a new SUnit from an existing one.
     280             :   /// Updates the topological ordering if required.
     281       19628 :   SUnit *CreateClone(SUnit *N) {
     282       39256 :     unsigned NumSUnits = SUnits.size();
     283       19628 :     SUnit *NewNode = Clone(N);
     284             :     // Update the topological ordering.
     285       19628 :     if (NewNode->NodeNum >= NumSUnits)
     286       19628 :       Topo.InitDAGTopologicalSorting();
     287       19628 :     return NewNode;
     288             :   }
     289             : 
     290             :   /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
     291             :   /// need actual latency information but the hybrid scheduler does.
     292     3997829 :   bool forceUnitLatencies() const override {
     293     8678225 :     return !NeedLatency;
     294             :   }
     295             : };
     296             : 
     297             : }  // end anonymous namespace
     298             : 
     299             : /// GetCostForDef - Looks up the register class and cost for a given definition.
     300             : /// Typically this just means looking up the representative register class,
     301             : /// but for untyped values (MVT::Untyped) it means inspecting the node's
     302             : /// opcode to determine what register class is being generated.
     303      521311 : static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
     304             :                           const TargetLowering *TLI,
     305             :                           const TargetInstrInfo *TII,
     306             :                           const TargetRegisterInfo *TRI,
     307             :                           unsigned &RegClass, unsigned &Cost,
     308             :                           const MachineFunction &MF) {
     309             :   MVT VT = RegDefPos.GetValue();
     310             : 
     311             :   // Special handling for untyped values.  These values can only come from
     312             :   // the expansion of custom DAG-to-DAG patterns.
     313      521311 :   if (VT == MVT::Untyped) {
     314        1954 :     const SDNode *Node = RegDefPos.GetNode();
     315             : 
     316             :     // Special handling for CopyFromReg of untyped values.
     317        2090 :     if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
     318         272 :       unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     319         136 :       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
     320         272 :       RegClass = RC->getID();
     321         136 :       Cost = 1;
     322         527 :       return;
     323             :     }
     324             : 
     325             :     unsigned Opcode = Node->getMachineOpcode();
     326        1818 :     if (Opcode == TargetOpcode::REG_SEQUENCE) {
     327         510 :       unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
     328         255 :       const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
     329         510 :       RegClass = RC->getID();
     330         255 :       Cost = 1;
     331         255 :       return;
     332             :     }
     333             : 
     334        1563 :     unsigned Idx = RegDefPos.GetIdx();
     335        3126 :     const MCInstrDesc Desc = TII->get(Opcode);
     336        1563 :     const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
     337        3126 :     RegClass = RC->getID();
     338             :     // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
     339             :     // better way to determine it.
     340        1563 :     Cost = 1;
     341             :   } else {
     342     1038714 :     RegClass = TLI->getRepRegClassFor(VT)->getID();
     343      519357 :     Cost = TLI->getRepRegClassCostFor(VT);
     344             :   }
     345             : }
     346             : 
     347             : /// Schedule - Schedule the DAG using list scheduling.
     348      310148 : void ScheduleDAGRRList::Schedule() {
     349             :   DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
     350             :                << " '" << BB->getName() << "' **********\n");
     351             : 
     352      310148 :   CurCycle = 0;
     353      310148 :   IssueCount = 0;
     354      310148 :   MinAvailableCycle =
     355      310148 :       DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
     356      310148 :   NumLiveRegs = 0;
     357             :   // Allocate slots for each physical register, plus one for a special register
     358             :   // to track the virtual resource of a calling sequence.
     359      310148 :   LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
     360      310148 :   LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
     361      310148 :   CallSeqEndForStart.clear();
     362             :   assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
     363             : 
     364             :   // Build the scheduling graph.
     365      310148 :   BuildSchedGraph(nullptr);
     366             : 
     367             :   DEBUG(for (SUnit &SU : SUnits)
     368             :           SU.dumpAll(this));
     369      310148 :   Topo.InitDAGTopologicalSorting();
     370             : 
     371      310148 :   AvailableQueue->initNodes(SUnits);
     372             : 
     373      310148 :   HazardRec->Reset();
     374             : 
     375             :   // Execute the actual scheduling loop.
     376      310148 :   ListScheduleBottomUp();
     377             : 
     378      310148 :   AvailableQueue->releaseState();
     379             : 
     380             :   DEBUG({
     381             :       dbgs() << "*** Final schedule ***\n";
     382             :       dumpSchedule();
     383             :       dbgs() << '\n';
     384             :     });
     385      310148 : }
     386             : 
     387             : //===----------------------------------------------------------------------===//
     388             : //  Bottom-Up Scheduling
     389             : //===----------------------------------------------------------------------===//
     390             : 
     391             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
     392             : /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
     393     4680396 : void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
     394     4680396 :   SUnit *PredSU = PredEdge->getSUnit();
     395             : 
     396             : #ifndef NDEBUG
     397             :   if (PredSU->NumSuccsLeft == 0) {
     398             :     dbgs() << "*** Scheduling failed! ***\n";
     399             :     PredSU->dump(this);
     400             :     dbgs() << " has been released too many times!\n";
     401             :     llvm_unreachable(nullptr);
     402             :   }
     403             : #endif
     404     4680396 :   --PredSU->NumSuccsLeft;
     405             : 
     406     4680396 :   if (!forceUnitLatencies()) {
     407             :     // Updating predecessor's height. This is now the cycle when the
     408             :     // predecessor can be scheduled without causing a pipeline stall.
     409      237148 :     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
     410             :   }
     411             : 
     412             :   // If all the node's successors are scheduled, this node is ready
     413             :   // to be scheduled. Ignore the special EntrySU node.
     414     4680396 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
     415     3439464 :     PredSU->isAvailable = true;
     416             : 
     417             :     unsigned Height = PredSU->getHeight();
     418     3439464 :     if (Height < MinAvailableCycle)
     419     1485657 :       MinAvailableCycle = Height;
     420             : 
     421     3439464 :     if (isReady(PredSU)) {
     422     3439464 :       AvailableQueue->push(PredSU);
     423             :     }
     424             :     // CapturePred and others may have left the node in the pending queue, avoid
     425             :     // adding it twice.
     426           0 :     else if (!PredSU->isPending) {
     427           0 :       PredSU->isPending = true;
     428           0 :       PendingQueue.push_back(PredSU);
     429             :     }
     430             :   }
     431     4680396 : }
     432             : 
     433             : /// IsChainDependent - Test if Outer is reachable from Inner through
     434             : /// chain dependencies.
     435        2221 : static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
     436             :                              unsigned NestLevel,
     437             :                              const TargetInstrInfo *TII) {
     438             :   SDNode *N = Outer;
     439             :   while (true) {
     440        4511 :     if (N == Inner)
     441             :       return true;
     442             :     // For a TokenFactor, examine each operand. There may be multiple ways
     443             :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     444             :     // most nesting in order to ensure that we find the corresponding match.
     445        9014 :     if (N->getOpcode() == ISD::TokenFactor) {
     446        1765 :       for (const SDValue &Op : N->op_values())
     447        1343 :         if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
     448             :           return true;
     449             :       return false;
     450             :     }
     451             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     452        4082 :     if (N->isMachineOpcode()) {
     453        3645 :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     454           0 :         ++NestLevel;
     455        3645 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     456        1792 :         if (NestLevel == 0)
     457             :           return false;
     458           0 :         --NestLevel;
     459             :       }
     460             :     }
     461             :     // Otherwise, find the chain and continue climbing.
     462       11535 :     for (const SDValue &Op : N->op_values())
     463       11535 :       if (Op.getValueType() == MVT::Other) {
     464        2290 :         N = Op.getNode();
     465             :         goto found_chain_operand;
     466             :       }
     467             :     return false;
     468             :   found_chain_operand:;
     469        2290 :     if (N->getOpcode() == ISD::EntryToken)
     470             :       return false;
     471             :   }
     472             : }
     473             : 
     474             : /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
     475             : /// the corresponding (lowered) CALLSEQ_BEGIN node.
     476             : ///
     477             : /// NestLevel and MaxNested are used in recursion to indcate the current level
     478             : /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
     479             : /// level seen so far.
     480             : ///
     481             : /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
     482             : /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
     483             : static SDNode *
     484      316721 : FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
     485             :                  const TargetInstrInfo *TII) {
     486             :   while (true) {
     487             :     // For a TokenFactor, examine each operand. There may be multiple ways
     488             :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     489             :     // most nesting in order to ensure that we find the corresponding match.
     490     2443948 :     if (N->getOpcode() == ISD::TokenFactor) {
     491             :       SDNode *Best = nullptr;
     492       34767 :       unsigned BestMaxNest = MaxNest;
     493      158470 :       for (const SDValue &Op : N->op_values()) {
     494      123703 :         unsigned MyNestLevel = NestLevel;
     495      123703 :         unsigned MyMaxNest = MaxNest;
     496      247406 :         if (SDNode *New = FindCallSeqStart(Op.getNode(),
     497      123703 :                                            MyNestLevel, MyMaxNest, TII))
     498      123666 :           if (!Best || (MyMaxNest > BestMaxNest)) {
     499             :             Best = New;
     500       34770 :             BestMaxNest = MyMaxNest;
     501             :           }
     502             :       }
     503             :       assert(Best);
     504       34767 :       MaxNest = BestMaxNest;
     505       34767 :       return Best;
     506             :     }
     507             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     508     1187207 :     if (N->isMachineOpcode()) {
     509      817842 :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     510      193022 :         ++NestLevel;
     511      193022 :         MaxNest = std::max(MaxNest, NestLevel);
     512      624820 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     513             :         assert(NestLevel != 0);
     514      281921 :         --NestLevel;
     515      281921 :         if (NestLevel == 0)
     516             :           return N;
     517             :       }
     518             :     }
     519             :     // Otherwise, find the chain and continue climbing.
     520     2965014 :     for (const SDValue &Op : N->op_values())
     521     2965014 :       if (Op.getValueType() == MVT::Other) {
     522      905290 :         N = Op.getNode();
     523             :         goto found_chain_operand;
     524             :       }
     525             :     return nullptr;
     526             :   found_chain_operand:;
     527      905290 :     if (N->getOpcode() == ISD::EntryToken)
     528             :       return nullptr;
     529             :   }
     530             : }
     531             : 
     532             : /// Call ReleasePred for each predecessor, then update register live def/gen.
     533             : /// Always update LiveRegDefs for a register dependence even if the current SU
     534             : /// also defines the register. This effectively create one large live range
     535             : /// across a sequence of two-address node. This is important because the
     536             : /// entire chain must be scheduled together. Example:
     537             : ///
     538             : /// flags = (3) add
     539             : /// flags = (2) addc flags
     540             : /// flags = (1) addc flags
     541             : ///
     542             : /// results in
     543             : ///
     544             : /// LiveRegDefs[flags] = 3
     545             : /// LiveRegGens[flags] = 1
     546             : ///
     547             : /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
     548             : /// interference on flags.
     549     4061639 : void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
     550             :   // Bottom up: release predecessors
     551    13422433 :   for (SDep &Pred : SU->Preds) {
     552     4680396 :     ReleasePred(SU, &Pred);
     553             :     if (Pred.isAssignedRegDep()) {
     554             :       // This is a physical register dependency and it's impossible or
     555             :       // expensive to copy the register. Make sure nothing that can
     556             :       // clobber the register is scheduled between the predecessor and
     557             :       // this node.
     558      131034 :       SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
     559             :       assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
     560             :              "interference on register dependence");
     561      131034 :       LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
     562      131034 :       if (!LiveRegGens[Pred.getReg()]) {
     563      127309 :         ++NumLiveRegs;
     564      254618 :         LiveRegGens[Pred.getReg()] = SU;
     565             :       }
     566             :     }
     567             :   }
     568             : 
     569             :   // If we're scheduling a lowered CALLSEQ_END, find the corresponding
     570             :   // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
     571             :   // these nodes, to prevent other calls from being interscheduled with them.
     572     4061640 :   unsigned CallResource = TRI->getNumRegs();
     573     8123280 :   if (!LiveRegDefs[CallResource])
     574     3325753 :     for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
     575     5635364 :       if (Node->isMachineOpcode() &&
     576     2275698 :           Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     577      193018 :         unsigned NestLevel = 0;
     578      193018 :         unsigned MaxNest = 0;
     579      193018 :         SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
     580             :         assert(N && "Must find call sequence start");
     581             : 
     582      386036 :         SUnit *Def = &SUnits[N->getNodeId()];
     583      386036 :         CallSeqEndForStart[Def] = SU;
     584             : 
     585      193018 :         ++NumLiveRegs;
     586      193018 :         LiveRegDefs[CallResource] = Def;
     587      193018 :         LiveRegGens[CallResource] = SU;
     588             :         break;
     589             :       }
     590     4061640 : }
     591             : 
     592             : /// Check to see if any of the pending instructions are ready to issue.  If
     593             : /// so, add them to the available queue.
     594     3724607 : void ScheduleDAGRRList::ReleasePending() {
     595     3724607 :   if (DisableSchedCycles) {
     596             :     assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
     597             :     return;
     598             :   }
     599             : 
     600             :   // If the available queue is empty, it is safe to reset MinAvailableCycle.
     601     3724607 :   if (AvailableQueue->empty())
     602     1768836 :     MinAvailableCycle = std::numeric_limits<unsigned>::max();
     603             : 
     604             :   // Check to see if any of the pending instructions are ready to issue.  If
     605             :   // so, add them to the available queue.
     606     7449214 :   for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
     607           0 :     unsigned ReadyCycle = PendingQueue[i]->getHeight();
     608           0 :     if (ReadyCycle < MinAvailableCycle)
     609           0 :       MinAvailableCycle = ReadyCycle;
     610             : 
     611           0 :     if (PendingQueue[i]->isAvailable) {
     612           0 :       if (!isReady(PendingQueue[i]))
     613           0 :           continue;
     614           0 :       AvailableQueue->push(PendingQueue[i]);
     615             :     }
     616           0 :     PendingQueue[i]->isPending = false;
     617           0 :     PendingQueue[i] = PendingQueue.back();
     618             :     PendingQueue.pop_back();
     619           0 :     --i; --e;
     620             :   }
     621             : }
     622             : 
     623             : /// Move the scheduler state forward by the specified number of Cycles.
     624    10990802 : void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
     625    10990802 :   if (NextCycle <= CurCycle)
     626             :     return;
     627             : 
     628     3723766 :   IssueCount = 0;
     629     3723766 :   AvailableQueue->setCurCycle(NextCycle);
     630     3723766 :   if (!HazardRec->isEnabled()) {
     631             :     // Bypass lots of virtual calls in case of long latency.
     632     3677004 :     CurCycle = NextCycle;
     633             :   }
     634             :   else {
     635      269206 :     for (; CurCycle != NextCycle; ++CurCycle) {
     636      111222 :       HazardRec->RecedeCycle();
     637             :     }
     638             :   }
     639             :   // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
     640             :   // available Q to release pending nodes at least once before popping.
     641     3723766 :   ReleasePending();
     642             : }
     643             : 
     644             : /// Move the scheduler state forward until the specified node's dependents are
     645             : /// ready and can be scheduled with no resource conflicts.
     646     3751493 : void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
     647     3751493 :   if (DisableSchedCycles)
     648             :     return;
     649             : 
     650             :   // FIXME: Nodes such as CopyFromReg probably should not advance the current
     651             :   // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
     652             :   // has predecessors the cycle will be advanced when they are scheduled.
     653             :   // But given the crude nature of modeling latency though such nodes, we
     654             :   // currently need to treat these nodes like real instructions.
     655             :   // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
     656             : 
     657             :   unsigned ReadyCycle = SU->getHeight();
     658             : 
     659             :   // Bump CurCycle to account for latency. We assume the latency of other
     660             :   // available instructions may be hidden by the stall (not a full pipe stall).
     661             :   // This updates the hazard recognizer's cycle before reserving resources for
     662             :   // this instruction.
     663     3751492 :   AdvanceToCycle(ReadyCycle);
     664             : 
     665             :   // Calls are scheduled in their preceding cycle, so don't conflict with
     666             :   // hazards from instructions after the call. EmitNode will reset the
     667             :   // scoreboard state before emitting the call.
     668     3751491 :   if (SU->isCall)
     669             :     return;
     670             : 
     671             :   // FIXME: For resource conflicts in very long non-pipelined stages, we
     672             :   // should probably skip ahead here to avoid useless scoreboard checks.
     673             :   int Stalls = 0;
     674             :   while (true) {
     675             :     ScheduleHazardRecognizer::HazardType HT =
     676     3568272 :       HazardRec->getHazardType(SU, -Stalls);
     677             : 
     678     3568272 :     if (HT == ScheduleHazardRecognizer::NoHazard)
     679             :       break;
     680             : 
     681        9524 :     ++Stalls;
     682        9524 :   }
     683     3558748 :   AdvanceToCycle(CurCycle + Stalls);
     684             : }
     685             : 
     686             : /// Record this SUnit in the HazardRecognizer.
     687             : /// Does not update CurCycle.
     688     3751601 : void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     689     3751601 :   if (!HazardRec->isEnabled())
     690             :     return;
     691             : 
     692             :   // Check for phys reg copy.
     693       75537 :   if (!SU->getNode())
     694             :     return;
     695             : 
     696      151070 :   switch (SU->getNode()->getOpcode()) {
     697             :   default:
     698             :     assert(SU->getNode()->isMachineOpcode() &&
     699             :            "This target-independent node should not be scheduled.");
     700             :     break;
     701             :   case ISD::MERGE_VALUES:
     702             :   case ISD::TokenFactor:
     703             :   case ISD::LIFETIME_START:
     704             :   case ISD::LIFETIME_END:
     705             :   case ISD::CopyToReg:
     706             :   case ISD::CopyFromReg:
     707             :   case ISD::EH_LABEL:
     708             :     // Noops don't affect the scoreboard state. Copies are likely to be
     709             :     // removed.
     710             :     return;
     711         165 :   case ISD::INLINEASM:
     712             :     // For inline asm, clear the pipeline state.
     713         165 :     HazardRec->Reset();
     714             :     return;
     715             :   }
     716       53584 :   if (SU->isCall) {
     717             :     // Calls are scheduled with their preceding instructions. For bottom-up
     718             :     // scheduling, clear the pipeline state before emitting.
     719        1281 :     HazardRec->Reset();
     720             :   }
     721             : 
     722       53584 :   HazardRec->EmitInstruction(SU);
     723             : }
     724             : 
     725             : static void resetVRegCycle(SUnit *SU);
     726             : 
     727             : /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
     728             : /// count of its predecessors. If a predecessor pending count is zero, add it to
     729             : /// the Available queue.
     730     3751493 : void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
     731             :   DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
     732             :   DEBUG(SU->dump(this));
     733             : 
     734             : #ifndef NDEBUG
     735             :   if (CurCycle < SU->getHeight())
     736             :     DEBUG(dbgs() << "   Height [" << SU->getHeight()
     737             :           << "] pipeline stall!\n");
     738             : #endif
     739             : 
     740             :   // FIXME: Do not modify node height. It may interfere with
     741             :   // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
     742             :   // node its ready cycle can aid heuristics, and after scheduling it can
     743             :   // indicate the scheduled cycle.
     744     3751493 :   SU->setHeightToAtLeast(CurCycle);
     745             : 
     746             :   // Reserve resources for the scheduled instruction.
     747     3751492 :   EmitNode(SU);
     748             : 
     749     3751492 :   Sequence.push_back(SU);
     750             : 
     751     3751493 :   AvailableQueue->scheduledNode(SU);
     752             : 
     753             :   // If HazardRec is disabled, and each inst counts as one cycle, then
     754             :   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
     755             :   // PendingQueue for schedulers that implement HasReadyFilter.
     756     7427557 :   if (!HazardRec->isEnabled() && AvgIPC < 2)
     757     3676064 :     AdvanceToCycle(CurCycle + 1);
     758             : 
     759             :   // Update liveness of predecessors before successors to avoid treating a
     760             :   // two-address node as a live range def.
     761     3751492 :   ReleasePredecessors(SU);
     762             : 
     763             :   // Release all the implicit physical register defs that are live.
     764    16963276 :   for (SDep &Succ : SU->Succs) {
     765             :     // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
     766      259924 :     if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
     767             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     768      126469 :       --NumLiveRegs;
     769      252938 :       LiveRegDefs[Succ.getReg()] = nullptr;
     770      126469 :       LiveRegGens[Succ.getReg()] = nullptr;
     771      126469 :       releaseInterferences(Succ.getReg());
     772             :     }
     773             :   }
     774             :   // Release the special call resource dependence, if this is the beginning
     775             :   // of a call.
     776     3751493 :   unsigned CallResource = TRI->getNumRegs();
     777     7502986 :   if (LiveRegDefs[CallResource] == SU)
     778      193022 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     779             :          SUNode = SUNode->getGluedNode()) {
     780      387765 :       if (SUNode->isMachineOpcode() &&
     781      193666 :           SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     782             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     783      193022 :         --NumLiveRegs;
     784      193022 :         LiveRegDefs[CallResource] = nullptr;
     785      193022 :         LiveRegGens[CallResource] = nullptr;
     786      193022 :         releaseInterferences(CallResource);
     787             :       }
     788             :     }
     789             : 
     790     3751493 :   resetVRegCycle(SU);
     791             : 
     792     3751493 :   SU->isScheduled = true;
     793             : 
     794             :   // Conditions under which the scheduler should eagerly advance the cycle:
     795             :   // (1) No available instructions
     796             :   // (2) All pipelines full, so available instructions must have hazards.
     797             :   //
     798             :   // If HazardRec is disabled, the cycle was pre-advanced before calling
     799             :   // ReleasePredecessors. In that case, IssueCount should remain 0.
     800             :   //
     801             :   // Check AvailableQueue after ReleasePredecessors in case of zero latency.
     802     7427557 :   if (HazardRec->isEnabled() || AvgIPC > 1) {
     803       75429 :     if (SU->getNode() && SU->getNode()->isMachineOpcode())
     804       53512 :       ++IssueCount;
     805      150858 :     if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
     806      146359 :         || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
     807        4499 :       AdvanceToCycle(CurCycle + 1);
     808             :   }
     809     3751493 : }
     810             : 
     811             : /// CapturePred - This does the opposite of ReleasePred. Since SU is being
     812             : /// unscheduled, increase the succ left count of its predecessors. Remove
     813             : /// them from AvailableQueue if necessary.
     814       12518 : void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
     815             :   SUnit *PredSU = PredEdge->getSUnit();
     816       12518 :   if (PredSU->isAvailable) {
     817        2870 :     PredSU->isAvailable = false;
     818        2870 :     if (!PredSU->isPending)
     819        1782 :       AvailableQueue->remove(PredSU);
     820             :   }
     821             : 
     822             :   assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
     823             :          "NumSuccsLeft will overflow!");
     824       12518 :   ++PredSU->NumSuccsLeft;
     825       12518 : }
     826             : 
     827             : /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
     828             : /// its predecessor states to reflect the change.
     829        8472 : void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
     830             :   DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
     831             :   DEBUG(SU->dump(this));
     832             : 
     833       41980 :   for (SDep &Pred : SU->Preds) {
     834       12518 :     CapturePred(&Pred);
     835        3562 :     if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
     836             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     837             :       assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
     838             :              "Physical register dependency violated?");
     839         840 :       --NumLiveRegs;
     840        1680 :       LiveRegDefs[Pred.getReg()] = nullptr;
     841         840 :       LiveRegGens[Pred.getReg()] = nullptr;
     842         840 :       releaseInterferences(Pred.getReg());
     843             :     }
     844             :   }
     845             : 
     846             :   // Reclaim the special call resource dependence, if this is the beginning
     847             :   // of a call.
     848        8472 :   unsigned CallResource = TRI->getNumRegs();
     849        8472 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     850             :        SUNode = SUNode->getGluedNode()) {
     851       18533 :     if (SUNode->isMachineOpcode() &&
     852        8340 :         SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     853           8 :       SUnit *SeqEnd = CallSeqEndForStart[SU];
     854             :       assert(SeqEnd && "Call sequence start/end must be known");
     855             :       assert(!LiveRegDefs[CallResource]);
     856             :       assert(!LiveRegGens[CallResource]);
     857           4 :       ++NumLiveRegs;
     858           8 :       LiveRegDefs[CallResource] = SU;
     859           4 :       LiveRegGens[CallResource] = SeqEnd;
     860             :     }
     861             :   }
     862             : 
     863             :   // Release the special call resource dependence, if this is the end
     864             :   // of a call.
     865       16944 :   if (LiveRegGens[CallResource] == SU)
     866           0 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     867             :          SUNode = SUNode->getGluedNode()) {
     868           0 :       if (SUNode->isMachineOpcode() &&
     869           0 :           SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     870             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     871             :         assert(LiveRegDefs[CallResource]);
     872             :         assert(LiveRegGens[CallResource]);
     873           0 :         --NumLiveRegs;
     874           0 :         LiveRegDefs[CallResource] = nullptr;
     875           0 :         LiveRegGens[CallResource] = nullptr;
     876           0 :         releaseInterferences(CallResource);
     877             :       }
     878             :     }
     879             : 
     880      140952 :   for (auto &Succ : SU->Succs) {
     881             :     if (Succ.isAssignedRegDep()) {
     882             :       auto Reg = Succ.getReg();
     883        1108 :       if (!LiveRegDefs[Reg])
     884           0 :         ++NumLiveRegs;
     885             :       // This becomes the nearest def. Note that an earlier def may still be
     886             :       // pending if this is a two-address node.
     887         554 :       LiveRegDefs[Reg] = SU;
     888             : 
     889             :       // Update LiveRegGen only if was empty before this unscheduling.
     890             :       // This is to avoid incorrect updating LiveRegGen set in previous run.
     891         554 :       if (!LiveRegGens[Reg]) {
     892             :         // Find the successor with the lowest height.
     893           0 :         LiveRegGens[Reg] = Succ.getSUnit();
     894           0 :         for (auto &Succ2 : SU->Succs) {
     895           0 :           if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
     896           0 :               Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
     897           0 :             LiveRegGens[Reg] = Succ2.getSUnit();
     898             :         }
     899             :       }
     900             :     }
     901             :   }
     902       16944 :   if (SU->getHeight() < MinAvailableCycle)
     903       15862 :     MinAvailableCycle = SU->getHeight();
     904             : 
     905        8472 :   SU->setHeightDirty();
     906        8472 :   SU->isScheduled = false;
     907        8472 :   SU->isAvailable = true;
     908        8472 :   if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
     909             :     // Don't make available until backtracking is complete.
     910           0 :     SU->isPending = true;
     911           0 :     PendingQueue.push_back(SU);
     912             :   }
     913             :   else {
     914        8472 :     AvailableQueue->push(SU);
     915             :   }
     916        8472 :   AvailableQueue->unscheduledNode(SU);
     917        8472 : }
     918             : 
     919             : /// After backtracking, the hazard checker needs to be restored to a state
     920             : /// corresponding the current cycle.
     921         840 : void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
     922         840 :   HazardRec->Reset();
     923             : 
     924        2520 :   unsigned LookAhead = std::min((unsigned)Sequence.size(),
     925        2520 :                                 HazardRec->getMaxLookAhead());
     926         840 :   if (LookAhead == 0)
     927             :     return;
     928             : 
     929             :   std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
     930           4 :   unsigned HazardCycle = (*I)->getHeight();
     931         112 :   for (auto E = Sequence.end(); I != E; ++I) {
     932         108 :     SUnit *SU = *I;
     933         288 :     for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
     934          90 :       HazardRec->RecedeCycle();
     935             :     }
     936         108 :     EmitNode(SU);
     937             :   }
     938             : }
     939             : 
     940             : /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
     941             : /// BTCycle in order to schedule a specific node.
     942         840 : void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
     943         840 :   SUnit *OldSU = Sequence.back();
     944             :   while (true) {
     945             :     Sequence.pop_back();
     946             :     // FIXME: use ready cycle instead of height
     947        8472 :     CurCycle = OldSU->getHeight();
     948        8472 :     UnscheduleNodeBottomUp(OldSU);
     949        8472 :     AvailableQueue->setCurCycle(CurCycle);
     950        8472 :     if (OldSU == BtSU)
     951             :       break;
     952        7632 :     OldSU = Sequence.back();
     953             :   }
     954             : 
     955             :   assert(!SU->isSucc(OldSU) && "Something is wrong!");
     956             : 
     957         840 :   RestoreHazardCheckerBottomUp();
     958             : 
     959         840 :   ReleasePending();
     960             : 
     961             :   ++NumBacktracks;
     962         840 : }
     963             : 
     964        2338 : static bool isOperandOf(const SUnit *SU, SDNode *N) {
     965        2338 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     966             :        SUNode = SUNode->getGluedNode()) {
     967        2367 :     if (SUNode->isOperandOf(N))
     968             :       return true;
     969             :   }
     970             :   return false;
     971             : }
     972             : 
     973             : /// TryUnfold - Attempt to unfold
     974        6368 : SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
     975        6368 :   SDNode *N = SU->getNode();
     976             :   // Use while over if to ease fall through.
     977             :   SmallVector<SDNode *, 2> NewNodes;
     978        6368 :   if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
     979             :     return nullptr;
     980             : 
     981             :   // unfolding an x86 DEC64m operation results in store, dec, load which
     982             :   // can't be handled here so quit
     983        6368 :   if (NewNodes.size() == 3)
     984             :     return nullptr;
     985             : 
     986             :   assert(NewNodes.size() == 2 && "Expected a load folding node!");
     987             : 
     988        6357 :   N = NewNodes[1];
     989        6357 :   SDNode *LoadNode = NewNodes[0];
     990        6357 :   unsigned NumVals = N->getNumValues();
     991        6357 :   unsigned OldNumVals = SU->getNode()->getNumValues();
     992             : 
     993             :   // LoadNode may already exist. This can happen when there is another
     994             :   // load from the same location and producing the same type of value
     995             :   // but it has different alignment or volatileness.
     996             :   bool isNewLoad = true;
     997             :   SUnit *LoadSU;
     998        6357 :   if (LoadNode->getNodeId() != -1) {
     999           1 :     LoadSU = &SUnits[LoadNode->getNodeId()];
    1000             :     // If LoadSU has already been scheduled, we should clone it but
    1001             :     // this would negate the benefit to unfolding so just return SU.
    1002           1 :     if (LoadSU->isScheduled)
    1003             :       return SU;
    1004             :     isNewLoad = false;
    1005             :   } else {
    1006        6356 :     LoadSU = CreateNewSUnit(LoadNode);
    1007        6356 :     LoadNode->setNodeId(LoadSU->NodeNum);
    1008             : 
    1009        6356 :     InitNumRegDefsLeft(LoadSU);
    1010        6356 :     computeLatency(LoadSU);
    1011             :   }
    1012             : 
    1013             :   DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
    1014             : 
    1015             :   // Now that we are committed to unfolding replace DAG Uses.
    1016       20188 :   for (unsigned i = 0; i != NumVals; ++i)
    1017       20748 :     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
    1018       19068 :   DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
    1019             :                                  SDValue(LoadNode, 1));
    1020             : 
    1021        6356 :   SUnit *NewSU = CreateNewSUnit(N);
    1022             :   assert(N->getNodeId() == -1 && "Node already inserted!");
    1023        6356 :   N->setNodeId(NewSU->NodeNum);
    1024             : 
    1025       12712 :   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1026       49168 :   for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
    1027             :     if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
    1028         560 :       NewSU->isTwoAddress = true;
    1029         560 :       break;
    1030             :     }
    1031             :   }
    1032       12712 :   if (MCID.isCommutable())
    1033        5821 :     NewSU->isCommutable = true;
    1034             : 
    1035        6356 :   InitNumRegDefsLeft(NewSU);
    1036        6356 :   computeLatency(NewSU);
    1037             : 
    1038             :   // Record all the edges to and from the old SU, by category.
    1039             :   SmallVector<SDep, 4> ChainPreds;
    1040             :   SmallVector<SDep, 4> ChainSuccs;
    1041             :   SmallVector<SDep, 4> LoadPreds;
    1042             :   SmallVector<SDep, 4> NodePreds;
    1043             :   SmallVector<SDep, 4> NodeSuccs;
    1044       20892 :   for (SDep &Pred : SU->Preds) {
    1045        7268 :     if (Pred.isCtrl())
    1046        4930 :       ChainPreds.push_back(Pred);
    1047        2338 :     else if (isOperandOf(Pred.getSUnit(), LoadNode))
    1048        1760 :       LoadPreds.push_back(Pred);
    1049             :     else
    1050         578 :       NodePreds.push_back(Pred);
    1051             :   }
    1052       44818 :   for (SDep &Succ : SU->Succs) {
    1053       19231 :     if (Succ.isCtrl())
    1054        6515 :       ChainSuccs.push_back(Succ);
    1055             :     else
    1056       12716 :       NodeSuccs.push_back(Succ);
    1057             :   }
    1058             : 
    1059             :   // Now assign edges to the newly-created nodes.
    1060       16216 :   for (const SDep &Pred : ChainPreds) {
    1061        4930 :     RemovePred(SU, Pred);
    1062        4930 :     if (isNewLoad)
    1063        4930 :       AddPred(LoadSU, Pred);
    1064             :   }
    1065        9876 :   for (const SDep &Pred : LoadPreds) {
    1066        1760 :     RemovePred(SU, Pred);
    1067        1760 :     if (isNewLoad)
    1068        1760 :       AddPred(LoadSU, Pred);
    1069             :   }
    1070        7512 :   for (const SDep &Pred : NodePreds) {
    1071         578 :     RemovePred(SU, Pred);
    1072         578 :     AddPred(NewSU, Pred);
    1073             :   }
    1074       31788 :   for (SDep D : NodeSuccs) {
    1075             :     SUnit *SuccDep = D.getSUnit();
    1076             :     D.setSUnit(SU);
    1077       12716 :     RemovePred(SuccDep, D);
    1078             :     D.setSUnit(NewSU);
    1079       12716 :     AddPred(SuccDep, D);
    1080             :     // Balance register pressure.
    1081       12716 :     if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
    1082       12716 :         !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
    1083           0 :       --NewSU->NumRegDefsLeft;
    1084             :   }
    1085       19386 :   for (SDep D : ChainSuccs) {
    1086             :     SUnit *SuccDep = D.getSUnit();
    1087             :     D.setSUnit(SU);
    1088        6515 :     RemovePred(SuccDep, D);
    1089        6515 :     if (isNewLoad) {
    1090             :       D.setSUnit(LoadSU);
    1091        6515 :       AddPred(SuccDep, D);
    1092             :     }
    1093             :   }
    1094             : 
    1095             :   // Add a data dependency to reflect that NewSU reads the value defined
    1096             :   // by LoadSU.
    1097             :   SDep D(LoadSU, SDep::Data, 0);
    1098        6356 :   D.setLatency(LoadSU->Latency);
    1099        6356 :   AddPred(NewSU, D);
    1100             : 
    1101        6356 :   if (isNewLoad)
    1102        6356 :     AvailableQueue->addNode(LoadSU);
    1103        6356 :   AvailableQueue->addNode(NewSU);
    1104             : 
    1105             :   ++NumUnfolds;
    1106             : 
    1107        6356 :   if (NewSU->NumSuccsLeft == 0)
    1108          41 :     NewSU->isAvailable = true;
    1109             : 
    1110             :   return NewSU;
    1111             : }
    1112             : 
    1113             : /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
    1114             : /// successors to the newly created node.
    1115       19768 : SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
    1116       19768 :   SDNode *N = SU->getNode();
    1117       19768 :   if (!N)
    1118             :     return nullptr;
    1119             : 
    1120             :   DEBUG(dbgs() << "Considering duplicating the SU\n");
    1121             :   DEBUG(SU->dump(this));
    1122             : 
    1123          90 :   if (N->getGluedNode() &&
    1124          90 :       !TII->canCopyGluedNodeDuringSchedule(N)) {
    1125             :     DEBUG(dbgs()
    1126             :         << "Giving up because it has incoming glue and the target does not "
    1127             :            "want to copy it\n");
    1128             :     return nullptr;
    1129             :   }
    1130             : 
    1131             :   SUnit *NewSU;
    1132             :   bool TryUnfold = false;
    1133      104286 :   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
    1134             :     MVT VT = N->getSimpleValueType(i);
    1135       32463 :     if (VT == MVT::Glue) {
    1136             :       DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
    1137             :       return nullptr;
    1138       32463 :     } else if (VT == MVT::Other)
    1139             :       TryUnfold = true;
    1140             :   }
    1141       90872 :   for (const SDValue &Op : N->op_values()) {
    1142       71192 :     MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
    1143       71192 :     if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
    1144             :       DEBUG(dbgs() << "Giving up because it one of the operands is glue and "
    1145             :                       "the target does not want to copy it\n");
    1146             :       return nullptr;
    1147             :     }
    1148             :   }
    1149             : 
    1150             :   // If possible unfold instruction.
    1151       19680 :   if (TryUnfold) {
    1152        6368 :     SUnit *UnfoldSU = TryUnfoldSU(SU);
    1153        6368 :     if (!UnfoldSU)
    1154             :       return nullptr;
    1155             :     SU = UnfoldSU;
    1156             :     N = SU->getNode();
    1157             :     // If this can be scheduled don't bother duplicating and just return
    1158        6357 :     if (SU->NumSuccsLeft == 0)
    1159             :       return SU;
    1160             :   }
    1161             : 
    1162             :   DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
    1163       19628 :   NewSU = CreateClone(SU);
    1164             : 
    1165             :   // New SUnit has the exact same predecessors.
    1166       66590 :   for (SDep &Pred : SU->Preds)
    1167             :     if (!Pred.isArtificial())
    1168       23481 :       AddPred(NewSU, Pred);
    1169             : 
    1170             :   // Only copy scheduled successors. Cut them from old node's successor
    1171             :   // list and move them over.
    1172             :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1173       98968 :   for (SDep &Succ : SU->Succs) {
    1174           0 :     if (Succ.isArtificial())
    1175             :       continue;
    1176             :     SUnit *SuccSU = Succ.getSUnit();
    1177       39670 :     if (SuccSU->isScheduled) {
    1178       19799 :       SDep D = Succ;
    1179             :       D.setSUnit(NewSU);
    1180       19799 :       AddPred(SuccSU, D);
    1181             :       D.setSUnit(SU);
    1182       19799 :       DelDeps.push_back(std::make_pair(SuccSU, D));
    1183             :     }
    1184             :   }
    1185       59226 :   for (auto &DelDep : DelDeps)
    1186       19799 :     RemovePred(DelDep.first, DelDep.second);
    1187             : 
    1188       19628 :   AvailableQueue->updateNode(SU);
    1189       19628 :   AvailableQueue->addNode(NewSU);
    1190             : 
    1191             :   ++NumDups;
    1192             :   return NewSU;
    1193             : }
    1194             : 
    1195             : /// InsertCopiesAndMoveSuccs - Insert register copies and move all
    1196             : /// scheduled successors of the given SUnit to the last copy.
    1197          99 : void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
    1198             :                                               const TargetRegisterClass *DestRC,
    1199             :                                               const TargetRegisterClass *SrcRC,
    1200             :                                               SmallVectorImpl<SUnit*> &Copies) {
    1201          99 :   SUnit *CopyFromSU = CreateNewSUnit(nullptr);
    1202          99 :   CopyFromSU->CopySrcRC = SrcRC;
    1203          99 :   CopyFromSU->CopyDstRC = DestRC;
    1204             : 
    1205          99 :   SUnit *CopyToSU = CreateNewSUnit(nullptr);
    1206          99 :   CopyToSU->CopySrcRC = DestRC;
    1207          99 :   CopyToSU->CopyDstRC = SrcRC;
    1208             : 
    1209             :   // Only copy scheduled successors. Cut them from old node's successor
    1210             :   // list and move them over.
    1211             :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1212        1035 :   for (SDep &Succ : SU->Succs) {
    1213           0 :     if (Succ.isArtificial())
    1214             :       continue;
    1215             :     SUnit *SuccSU = Succ.getSUnit();
    1216         468 :     if (SuccSU->isScheduled) {
    1217         262 :       SDep D = Succ;
    1218         262 :       D.setSUnit(CopyToSU);
    1219         262 :       AddPred(SuccSU, D);
    1220         262 :       DelDeps.push_back(std::make_pair(SuccSU, Succ));
    1221             :     }
    1222             :     else {
    1223             :       // Avoid scheduling the def-side copy before other successors. Otherwise
    1224             :       // we could introduce another physreg interference on the copy and
    1225             :       // continue inserting copies indefinitely.
    1226         412 :       AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
    1227             :     }
    1228             :   }
    1229         623 :   for (auto &DelDep : DelDeps)
    1230         262 :     RemovePred(DelDep.first, DelDep.second);
    1231             : 
    1232             :   SDep FromDep(SU, SDep::Data, Reg);
    1233          99 :   FromDep.setLatency(SU->Latency);
    1234          99 :   AddPred(CopyFromSU, FromDep);
    1235          99 :   SDep ToDep(CopyFromSU, SDep::Data, 0);
    1236          99 :   ToDep.setLatency(CopyFromSU->Latency);
    1237          99 :   AddPred(CopyToSU, ToDep);
    1238             : 
    1239          99 :   AvailableQueue->updateNode(SU);
    1240          99 :   AvailableQueue->addNode(CopyFromSU);
    1241          99 :   AvailableQueue->addNode(CopyToSU);
    1242          99 :   Copies.push_back(CopyFromSU);
    1243          99 :   Copies.push_back(CopyToSU);
    1244             : 
    1245             :   ++NumPRCopies;
    1246          99 : }
    1247             : 
    1248             : /// getPhysicalRegisterVT - Returns the ValueType of the physical register
    1249             : /// definition of the specified node.
    1250             : /// FIXME: Move to SelectionDAG?
    1251       19768 : static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
    1252             :                                  const TargetInstrInfo *TII) {
    1253             :   unsigned NumRes;
    1254       39536 :   if (N->getOpcode() == ISD::CopyFromReg) {
    1255             :     // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
    1256             :     NumRes = 1;
    1257             :   } else {
    1258       19714 :     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1259             :     assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
    1260       19714 :     NumRes = MCID.getNumDefs();
    1261       19716 :     for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
    1262       19716 :       if (Reg == *ImpDef)
    1263             :         break;
    1264           2 :       ++NumRes;
    1265             :     }
    1266             :   }
    1267       19768 :   return N->getSimpleValueType(NumRes);
    1268             : }
    1269             : 
    1270             : /// CheckForLiveRegDef - Return true and update live register vector if the
    1271             : /// specified register def of the specified SUnit clobbers any "live" registers.
    1272      713713 : static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
    1273             :                                SUnit **LiveRegDefs,
    1274             :                                SmallSet<unsigned, 4> &RegAdded,
    1275             :                                SmallVectorImpl<unsigned> &LRegs,
    1276             :                                const TargetRegisterInfo *TRI) {
    1277     3972616 :   for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
    1278             : 
    1279             :     // Check if Ref is live.
    1280     1272595 :     if (!LiveRegDefs[*AliasI]) continue;
    1281             : 
    1282             :     // Allow multiple uses of the same def.
    1283      153286 :     if (LiveRegDefs[*AliasI] == SU) continue;
    1284             : 
    1285             :     // Add Reg to the set of interfering live regs.
    1286       42710 :     if (RegAdded.insert(*AliasI).second) {
    1287       56700 :       LRegs.push_back(*AliasI);
    1288             :     }
    1289             :   }
    1290      713713 : }
    1291             : 
    1292             : /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
    1293             : /// by RegMask, and add them to LRegs.
    1294        1061 : static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
    1295             :                                      ArrayRef<SUnit*> LiveRegDefs,
    1296             :                                      SmallSet<unsigned, 4> &RegAdded,
    1297             :                                      SmallVectorImpl<unsigned> &LRegs) {
    1298             :   // Look at all live registers. Skip Reg0 and the special CallResource.
    1299      277648 :   for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
    1300      553174 :     if (!LiveRegDefs[i]) continue;
    1301         186 :     if (LiveRegDefs[i] == SU) continue;
    1302         186 :     if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
    1303         186 :     if (RegAdded.insert(i).second)
    1304           7 :       LRegs.push_back(i);
    1305             :   }
    1306        1061 : }
    1307             : 
    1308             : /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
    1309             : static const uint32_t *getNodeRegMask(const SDNode *N) {
    1310     4546667 :   for (const SDValue &Op : N->op_values())
    1311     3774329 :     if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
    1312        1065 :       return RegOp->getRegMask();
    1313             :   return nullptr;
    1314             : }
    1315             : 
    1316             : /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
    1317             : /// scheduling of the given node to satisfy live physical register dependencies.
    1318             : /// If the specific node is the last one that's available to schedule, do
    1319             : /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
    1320     3760954 : bool ScheduleDAGRRList::
    1321             : DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
    1322     3760954 :   if (NumLiveRegs == 0)
    1323             :     return false;
    1324             : 
    1325      943999 :   SmallSet<unsigned, 4> RegAdded;
    1326             :   // If this node would clobber any "live" register, then it's not ready.
    1327             :   //
    1328             :   // If SU is the currently live definition of the same register that it uses,
    1329             :   // then we are free to schedule it.
    1330     3061241 :   for (SDep &Pred : SU->Preds) {
    1331       40986 :     if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
    1332       36894 :       CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
    1333             :                          RegAdded, LRegs, TRI);
    1334             :   }
    1335             : 
    1336      943999 :   for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
    1337     1941968 :     if (Node->getOpcode() == ISD::INLINEASM) {
    1338             :       // Inline asm can clobber physical defs.
    1339           3 :       unsigned NumOps = Node->getNumOperands();
    1340           6 :       if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
    1341             :         --NumOps;  // Ignore the glue operand.
    1342             : 
    1343          11 :       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
    1344             :         unsigned Flags =
    1345          24 :           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
    1346             :         unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
    1347             : 
    1348           8 :         ++i; // Skip the ID value.
    1349           5 :         if (InlineAsm::isRegDefKind(Flags) ||
    1350          13 :             InlineAsm::isRegDefEarlyClobberKind(Flags) ||
    1351             :             InlineAsm::isClobberKind(Flags)) {
    1352             :           // Check for def of register or earlyclobber register.
    1353          18 :           for (; NumVals; --NumVals, ++i) {
    1354          12 :             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
    1355           6 :             if (TargetRegisterInfo::isPhysicalRegister(Reg))
    1356           6 :               CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1357             :           }
    1358             :         } else
    1359           2 :           i += NumVals;
    1360             :       }
    1361           3 :       continue;
    1362             :     }
    1363             : 
    1364      970981 :     if (!Node->isMachineOpcode())
    1365      197586 :       continue;
    1366             :     // If we're in the middle of scheduling a call, don't begin scheduling
    1367             :     // another call. Also, don't allow any physical registers to be live across
    1368             :     // the call.
    1369      773395 :     if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
    1370             :       // Check the special calling-sequence resource.
    1371        1063 :       unsigned CallResource = TRI->getNumRegs();
    1372        2126 :       if (LiveRegDefs[CallResource]) {
    1373         878 :         SDNode *Gen = LiveRegGens[CallResource]->getNode();
    1374             :         while (SDNode *Glued = Gen->getGluedNode())
    1375             :           Gen = Glued;
    1376        1752 :         if (!IsChainDependent(Gen, Node, 0, TII) &&
    1377         874 :             RegAdded.insert(CallResource).second)
    1378         874 :           LRegs.push_back(CallResource);
    1379             :       }
    1380             :     }
    1381      773395 :     if (const uint32_t *RegMask = getNodeRegMask(Node))
    1382        2122 :       CheckForLiveRegDefMasked(SU, RegMask,
    1383        1061 :                                makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
    1384             :                                RegAdded, LRegs);
    1385             : 
    1386     1546790 :     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
    1387     1546790 :     if (MCID.hasOptionalDef()) {
    1388             :       // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
    1389             :       // This operand can be either a def of CPSR, if the S bit is set; or a use
    1390             :       // of %noreg.  When the OptionalDef is set to a valid register, we need to
    1391             :       // handle it in the same way as an ImplicitDef.
    1392       17631 :       for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
    1393        3683 :         if (MCID.OpInfo[i].isOptionalDef()) {
    1394         784 :           const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
    1395         392 :           unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
    1396         784 :           CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1397             :         }
    1398             :     }
    1399      773395 :     if (!MCID.ImplicitDefs)
    1400      438349 :       continue;
    1401     1724788 :     for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
    1402     1389742 :       CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1403             :   }
    1404             : 
    1405      943999 :   return !LRegs.empty();
    1406             : }
    1407             : 
    1408      320331 : void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
    1409             :   // Add the nodes that aren't ready back onto the available list.
    1410      320331 :   for (unsigned i = Interferences.size(); i > 0; --i) {
    1411       58462 :     SUnit *SU = Interferences[i-1];
    1412       29231 :     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
    1413       29231 :     if (Reg) {
    1414             :       SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
    1415       29231 :       if (!is_contained(LRegs, Reg))
    1416           4 :         continue;
    1417             :     }
    1418       29227 :     SU->isPending = false;
    1419             :     // The interfering node may no longer be available due to backtracking.
    1420             :     // Furthermore, it may have been made available again, in which case it is
    1421             :     // now already in the AvailableQueue.
    1422       29227 :     if (SU->isAvailable && !SU->NodeQueueId) {
    1423             :       DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
    1424        8373 :       AvailableQueue->push(SU);
    1425             :     }
    1426       58454 :     if (i < Interferences.size())
    1427           0 :       Interferences[i-1] = Interferences.back();
    1428             :     Interferences.pop_back();
    1429             :     LRegsMap.erase(LRegsPos);
    1430             :   }
    1431      320331 : }
    1432             : 
    1433             : /// Return a node that can be scheduled in this cycle. Requirements:
    1434             : /// (1) Ready: latency has been satisfied
    1435             : /// (2) No Hazards: resources are available
    1436             : /// (3) No Interferences: may unschedule to break register interferences.
    1437     3751493 : SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
    1438     3751493 :   SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
    1439     3752333 :   auto FindAvailableNode = [&]() {
    1440     3898474 :     while (CurSU) {
    1441             :       SmallVector<unsigned, 4> LRegs;
    1442     3819410 :       if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
    1443             :         break;
    1444             :       DEBUG(dbgs() << "    Interfering reg ";
    1445             :             if (LRegs[0] == TRI->getNumRegs())
    1446             :               dbgs() << "CallResource";
    1447             :             else
    1448             :               dbgs() << printReg(LRegs[0], TRI);
    1449             :             dbgs() << " SU #" << CurSU->NodeNum << '\n');
    1450             :       std::pair<LRegsMapT::iterator, bool> LRegsPair =
    1451       87687 :         LRegsMap.insert(std::make_pair(CurSU, LRegs));
    1452       29229 :       if (LRegsPair.second) {
    1453       29227 :         CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
    1454       58454 :         Interferences.push_back(CurSU);
    1455             :       }
    1456             :       else {
    1457             :         assert(CurSU->isPending && "Interferences are pending");
    1458             :         // Update the interference with current live regs.
    1459           2 :         LRegsPair.first->second = LRegs;
    1460             :       }
    1461       58458 :       CurSU = AvailableQueue->pop();
    1462             :     }
    1463     7503826 :   };
    1464     3751493 :   FindAvailableNode();
    1465     3751493 :   if (CurSU)
    1466             :     return CurSU;
    1467             : 
    1468             :   // All candidates are delayed due to live physical reg dependencies.
    1469             :   // Try backtracking, code duplication, or inserting cross class copies
    1470             :   // to resolve it.
    1471       66540 :   for (SUnit *TrySU : Interferences) {
    1472       23806 :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1473             : 
    1474             :     // Try unscheduling up to the point where it's safe to schedule
    1475             :     // this node.
    1476             :     SUnit *BtSU = nullptr;
    1477             :     unsigned LiveCycle = std::numeric_limits<unsigned>::max();
    1478       71418 :     for (unsigned Reg : LRegs) {
    1479       71418 :       if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
    1480       23806 :         BtSU = LiveRegGens[Reg];
    1481             :         LiveCycle = BtSU->getHeight();
    1482             :       }
    1483             :     }
    1484       47612 :     if (!WillCreateCycle(TrySU, BtSU))  {
    1485             :       // BacktrackBottomUp mutates Interferences!
    1486         840 :       BacktrackBottomUp(TrySU, BtSU);
    1487             : 
    1488             :       // Force the current node to be scheduled before the node that
    1489             :       // requires the physical reg dep.
    1490         840 :       if (BtSU->isAvailable) {
    1491         840 :         BtSU->isAvailable = false;
    1492         840 :         if (!BtSU->isPending)
    1493         840 :           AvailableQueue->remove(BtSU);
    1494             :       }
    1495             :       DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
    1496             :             << TrySU->NodeNum << ")\n");
    1497         840 :       AddPred(TrySU, SDep(BtSU, SDep::Artificial));
    1498             : 
    1499             :       // If one or more successors has been unscheduled, then the current
    1500             :       // node is no longer available.
    1501         840 :       if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
    1502             :         DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
    1503         241 :         CurSU = AvailableQueue->pop();
    1504             :       } else {
    1505             :         DEBUG(dbgs() << "TrySU available\n");
    1506             :         // Available and in AvailableQueue
    1507         599 :         AvailableQueue->remove(TrySU);
    1508         599 :         CurSU = TrySU;
    1509             :       }
    1510         840 :       FindAvailableNode();
    1511             :       // Interferences has been mutated. We must break.
    1512         840 :       break;
    1513             :     }
    1514             :   }
    1515             : 
    1516       20608 :   if (!CurSU) {
    1517             :     // Can't backtrack. If it's too expensive to copy the value, then try
    1518             :     // duplicate the nodes that produces these "too expensive to copy"
    1519             :     // values to break the dependency. In case even that doesn't work,
    1520             :     // insert cross class copies.
    1521             :     // If it's not too expensive, i.e. cost != -1, issue copies.
    1522       19768 :     SUnit *TrySU = Interferences[0];
    1523       19768 :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1524             :     assert(LRegs.size() == 1 && "Can't handle this yet!");
    1525       19768 :     unsigned Reg = LRegs[0];
    1526       39536 :     SUnit *LRDef = LiveRegDefs[Reg];
    1527       19768 :     MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
    1528             :     const TargetRegisterClass *RC =
    1529       19768 :       TRI->getMinimalPhysRegClass(Reg, VT);
    1530       19768 :     const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
    1531             : 
    1532             :     // If cross copy register class is the same as RC, then it must be possible
    1533             :     // copy the value directly. Do not try duplicate the def.
    1534             :     // If cross copy register class is not the same as RC, then it's possible to
    1535             :     // copy the value but it require cross register class copies and it is
    1536             :     // expensive.
    1537             :     // If cross copy register class is null, then it's not possible to copy
    1538             :     // the value at all.
    1539             :     SUnit *NewDef = nullptr;
    1540       19768 :     if (DestRC != RC) {
    1541       19768 :       NewDef = CopyAndMoveSuccessors(LRDef);
    1542       19768 :       if (!DestRC && !NewDef)
    1543           0 :         report_fatal_error("Can't handle live physical register dependency!");
    1544             :     }
    1545       19768 :     if (!NewDef) {
    1546             :       // Issue copies, these can be expensive cross register class copies.
    1547             :       SmallVector<SUnit*, 2> Copies;
    1548          99 :       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
    1549             :       DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
    1550             :             << " to SU #" << Copies.front()->NodeNum << "\n");
    1551         198 :       AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
    1552          99 :       NewDef = Copies.back();
    1553             :     }
    1554             : 
    1555             :     DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
    1556             :           << " to SU #" << TrySU->NodeNum << "\n");
    1557       19768 :     LiveRegDefs[Reg] = NewDef;
    1558       39536 :     AddPred(NewDef, SDep(TrySU, SDep::Artificial));
    1559       19768 :     TrySU->isAvailable = false;
    1560       19768 :     CurSU = NewDef;
    1561             :   }
    1562             :   assert(CurSU && "Unable to resolve live physical register dependencies!");
    1563       20608 :   return CurSU;
    1564             : }
    1565             : 
    1566             : /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
    1567             : /// schedulers.
    1568      310148 : void ScheduleDAGRRList::ListScheduleBottomUp() {
    1569             :   // Release any predecessors of the special Exit node.
    1570      310148 :   ReleasePredecessors(&ExitSU);
    1571             : 
    1572             :   // Add root to Available queue.
    1573      310148 :   if (!SUnits.empty()) {
    1574      307267 :     SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
    1575             :     assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
    1576      307267 :     RootSU->isAvailable = true;
    1577      307267 :     AvailableQueue->push(RootSU);
    1578             :   }
    1579             : 
    1580             :   // While Available queue is not empty, grab the node with the highest
    1581             :   // priority. If it is not ready put it back.  Schedule the node.
    1582      620296 :   Sequence.reserve(SUnits.size());
    1583     4061640 :   while (!AvailableQueue->empty() || !Interferences.empty()) {
    1584             :     DEBUG(dbgs() << "\nExamining Available:\n";
    1585             :           AvailableQueue->dump(this));
    1586             : 
    1587             :     // Pick the best node to schedule taking all constraints into
    1588             :     // consideration.
    1589     3751493 :     SUnit *SU = PickNodeToScheduleBottomUp();
    1590             : 
    1591     3751492 :     AdvancePastStalls(SU);
    1592             : 
    1593     3751492 :     ScheduleNodeBottomUp(SU);
    1594             : 
    1595     4070417 :     while (AvailableQueue->empty() && !PendingQueue.empty()) {
    1596             :       // Advance the cycle to free resources. Skip ahead to the next ready SU.
    1597             :       assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
    1598             :              "MinAvailableCycle uninitialized");
    1599           0 :       AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
    1600             :     }
    1601             :   }
    1602             : 
    1603             :   // Reverse the order if it is bottom up.
    1604             :   std::reverse(Sequence.begin(), Sequence.end());
    1605             : 
    1606             : #ifndef NDEBUG
    1607             :   VerifyScheduledSequence(/*isBottomUp=*/true);
    1608             : #endif
    1609      310148 : }
    1610             : 
    1611             : namespace {
    1612             : 
    1613             : class RegReductionPQBase;
    1614             : 
    1615             : struct queue_sort {
    1616             :   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
    1617             : };
    1618             : 
    1619             : #ifndef NDEBUG
    1620             : template<class SF>
    1621             : struct reverse_sort : public queue_sort {
    1622             :   SF &SortFunc;
    1623             : 
    1624             :   reverse_sort(SF &sf) : SortFunc(sf) {}
    1625             : 
    1626             :   bool operator()(SUnit* left, SUnit* right) const {
    1627             :     // reverse left/right rather than simply !SortFunc(left, right)
    1628             :     // to expose different paths in the comparison logic.
    1629             :     return SortFunc(right, left);
    1630             :   }
    1631             : };
    1632             : #endif // NDEBUG
    1633             : 
    1634             : /// bu_ls_rr_sort - Priority function for bottom up register pressure
    1635             : // reduction scheduler.
    1636             : struct bu_ls_rr_sort : public queue_sort {
    1637             :   enum {
    1638             :     IsBottomUp = true,
    1639             :     HasReadyFilter = false
    1640             :   };
    1641             : 
    1642             :   RegReductionPQBase *SPQ;
    1643             : 
    1644        5251 :   bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1645             : 
    1646             :   bool operator()(SUnit* left, SUnit* right) const;
    1647             : };
    1648             : 
    1649             : // src_ls_rr_sort - Priority function for source order scheduler.
    1650             : struct src_ls_rr_sort : public queue_sort {
    1651             :   enum {
    1652             :     IsBottomUp = true,
    1653             :     HasReadyFilter = false
    1654             :   };
    1655             : 
    1656             :   RegReductionPQBase *SPQ;
    1657             : 
    1658      276944 :   src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1659             : 
    1660             :   bool operator()(SUnit* left, SUnit* right) const;
    1661             : };
    1662             : 
    1663             : // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
    1664             : struct hybrid_ls_rr_sort : public queue_sort {
    1665             :   enum {
    1666             :     IsBottomUp = true,
    1667             :     HasReadyFilter = false
    1668             :   };
    1669             : 
    1670             :   RegReductionPQBase *SPQ;
    1671             : 
    1672       12513 :   hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1673             : 
    1674             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1675             : 
    1676             :   bool operator()(SUnit* left, SUnit* right) const;
    1677             : };
    1678             : 
    1679             : // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
    1680             : // scheduler.
    1681             : struct ilp_ls_rr_sort : public queue_sort {
    1682             :   enum {
    1683             :     IsBottomUp = true,
    1684             :     HasReadyFilter = false
    1685             :   };
    1686             : 
    1687             :   RegReductionPQBase *SPQ;
    1688             : 
    1689       15440 :   ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1690             : 
    1691             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1692             : 
    1693             :   bool operator()(SUnit* left, SUnit* right) const;
    1694             : };
    1695             : 
    1696      620294 : class RegReductionPQBase : public SchedulingPriorityQueue {
    1697             : protected:
    1698             :   std::vector<SUnit *> Queue;
    1699             :   unsigned CurQueueId = 0;
    1700             :   bool TracksRegPressure;
    1701             :   bool SrcOrder;
    1702             : 
    1703             :   // SUnits - The SUnits for the current graph.
    1704             :   std::vector<SUnit> *SUnits;
    1705             : 
    1706             :   MachineFunction &MF;
    1707             :   const TargetInstrInfo *TII;
    1708             :   const TargetRegisterInfo *TRI;
    1709             :   const TargetLowering *TLI;
    1710             :   ScheduleDAGRRList *scheduleDAG = nullptr;
    1711             : 
    1712             :   // SethiUllmanNumbers - The SethiUllman number for each node.
    1713             :   std::vector<unsigned> SethiUllmanNumbers;
    1714             : 
    1715             :   /// RegPressure - Tracking current reg pressure per register class.
    1716             :   std::vector<unsigned> RegPressure;
    1717             : 
    1718             :   /// RegLimit - Tracking the number of allocatable registers per register
    1719             :   /// class.
    1720             :   std::vector<unsigned> RegLimit;
    1721             : 
    1722             : public:
    1723      310148 :   RegReductionPQBase(MachineFunction &mf,
    1724             :                      bool hasReadyFilter,
    1725             :                      bool tracksrp,
    1726             :                      bool srcorder,
    1727             :                      const TargetInstrInfo *tii,
    1728             :                      const TargetRegisterInfo *tri,
    1729             :                      const TargetLowering *tli)
    1730      310148 :     : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
    1731     1240592 :       SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
    1732      310148 :     if (TracksRegPressure) {
    1733             :       unsigned NumRC = TRI->getNumRegClasses();
    1734       27953 :       RegLimit.resize(NumRC);
    1735       27953 :       RegPressure.resize(NumRC);
    1736             :       std::fill(RegLimit.begin(), RegLimit.end(), 0);
    1737             :       std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1738     4508224 :       for (const TargetRegisterClass *RC : TRI->regclasses())
    1739     6678477 :         RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
    1740             :     }
    1741      310148 :   }
    1742             : 
    1743             :   void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
    1744      310147 :     scheduleDAG = scheduleDag;
    1745             :   }
    1746             : 
    1747             :   ScheduleHazardRecognizer* getHazardRec() {
    1748     6307285 :     return scheduleDAG->getHazardRec();
    1749             :   }
    1750             : 
    1751             :   void initNodes(std::vector<SUnit> &sunits) override;
    1752             : 
    1753             :   void addNode(const SUnit *SU) override;
    1754             : 
    1755             :   void updateNode(const SUnit *SU) override;
    1756             : 
    1757      310148 :   void releaseState() override {
    1758      310148 :     SUnits = nullptr;
    1759             :     SethiUllmanNumbers.clear();
    1760             :     std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1761      310148 :   }
    1762             : 
    1763             :   unsigned getNodePriority(const SUnit *SU) const;
    1764             : 
    1765             :   unsigned getNodeOrdering(const SUnit *SU) const {
    1766    24586866 :     if (!SU->getNode()) return 0;
    1767             : 
    1768    24586732 :     return SU->getNode()->getIROrder();
    1769             :   }
    1770             : 
    1771    30578468 :   bool empty() const override { return Queue.empty(); }
    1772             : 
    1773     3763576 :   void push(SUnit *U) override {
    1774             :     assert(!U->NodeQueueId && "Node in the queue already");
    1775     3763576 :     U->NodeQueueId = ++CurQueueId;
    1776     3763576 :     Queue.push_back(U);
    1777     3763576 :   }
    1778             : 
    1779        3221 :   void remove(SUnit *SU) override {
    1780             :     assert(!Queue.empty() && "Queue is empty!");
    1781             :     assert(SU->NodeQueueId != 0 && "Not in queue!");
    1782             :     std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
    1783        3221 :     if (I != std::prev(Queue.end()))
    1784             :       std::swap(*I, Queue.back());
    1785             :     Queue.pop_back();
    1786        3221 :     SU->NodeQueueId = 0;
    1787        3221 :   }
    1788             : 
    1789       12716 :   bool tracksRegPressure() const override { return TracksRegPressure; }
    1790             : 
    1791             :   void dumpRegPressure() const;
    1792             : 
    1793             :   bool HighRegPressure(const SUnit *SU) const;
    1794             : 
    1795             :   bool MayReduceRegPressure(SUnit *SU) const;
    1796             : 
    1797             :   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
    1798             : 
    1799             :   void scheduledNode(SUnit *SU) override;
    1800             : 
    1801             :   void unscheduledNode(SUnit *SU) override;
    1802             : 
    1803             : protected:
    1804             :   bool canClobber(const SUnit *SU, const SUnit *Op);
    1805             :   void AddPseudoTwoAddrDeps();
    1806             :   void PrescheduleNodesWithMultipleUses();
    1807             :   void CalculateSethiUllmanNumbers();
    1808             : };
    1809             : 
    1810             : template<class SF>
    1811     3760355 : static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
    1812             :   std::vector<SUnit *>::iterator Best = Q.begin();
    1813    16457818 :   for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
    1814    12719446 :     if (Picker(*Best, *I))
    1815             :       Best = I;
    1816     3760355 :   SUnit *V = *Best;
    1817     3760355 :   if (Best != std::prev(Q.end()))
    1818             :     std::swap(*Best, Q.back());
    1819             :   Q.pop_back();
    1820     3760355 :   return V;
    1821             : }
    1822             : 
    1823             : template<class SF>
    1824             : SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
    1825             : #ifndef NDEBUG
    1826             :   if (DAG->StressSched) {
    1827             :     reverse_sort<SF> RPicker(Picker);
    1828             :     return popFromQueueImpl(Q, RPicker);
    1829             :   }
    1830             : #endif
    1831             :   (void)DAG;
    1832     3760355 :   return popFromQueueImpl(Q, Picker);
    1833             : }
    1834             : 
    1835             : //===----------------------------------------------------------------------===//
    1836             : //                RegReductionPriorityQueue Definition
    1837             : //===----------------------------------------------------------------------===//
    1838             : //
    1839             : // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
    1840             : // to reduce register pressure.
    1841             : //
    1842             : template<class SF>
    1843      310147 : class RegReductionPriorityQueue : public RegReductionPQBase {
    1844             :   SF Picker;
    1845             : 
    1846             : public:
    1847      310147 :   RegReductionPriorityQueue(MachineFunction &mf,
    1848             :                             bool tracksrp,
    1849             :                             bool srcorder,
    1850             :                             const TargetInstrInfo *tii,
    1851             :                             const TargetRegisterInfo *tri,
    1852             :                             const TargetLowering *tli)
    1853             :     : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
    1854             :                          tii, tri, tli),
    1855      310147 :       Picker(this) {}
    1856             : 
    1857           0 :   bool isBottomUp() const override { return SF::IsBottomUp; }
    1858             : 
    1859           0 :   bool isReady(SUnit *U) const override {
    1860           0 :     return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
    1861             :   }
    1862             : 
    1863     3769305 :   SUnit *pop() override {
    1864     3769305 :     if (Queue.empty()) return nullptr;
    1865             : 
    1866     3760355 :     SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
    1867     3760355 :     V->NodeQueueId = 0;
    1868     3760355 :     return V;
    1869             :   }
    1870             : 
    1871             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    1872             :   LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
    1873             :     // Emulate pop() without clobbering NodeQueueIds.
    1874             :     std::vector<SUnit *> DumpQueue = Queue;
    1875             :     SF DumpPicker = Picker;
    1876             :     while (!DumpQueue.empty()) {
    1877             :       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
    1878             :       dbgs() << "Height " << SU->getHeight() << ": ";
    1879             :       SU->dump(DAG);
    1880             :     }
    1881             :   }
    1882             : #endif
    1883             : };
    1884             : 
    1885             : using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
    1886             : using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
    1887             : using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
    1888             : using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
    1889             : 
    1890             : } // end anonymous namespace
    1891             : 
    1892             : //===----------------------------------------------------------------------===//
    1893             : //           Static Node Priority for Register Pressure Reduction
    1894             : //===----------------------------------------------------------------------===//
    1895             : 
    1896             : // Check for special nodes that bypass scheduling heuristics.
    1897             : // Currently this pushes TokenFactor nodes down, but may be used for other
    1898             : // pseudo-ops as well.
    1899             : //
    1900             : // Return -1 to schedule right above left, 1 for left above right.
    1901             : // Return 0 if no bias exists.
    1902             : static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
    1903    12697463 :   bool LSchedLow = left->isScheduleLow;
    1904    12697463 :   bool RSchedLow = right->isScheduleLow;
    1905    12697463 :   if (LSchedLow != RSchedLow)
    1906       71813 :     return LSchedLow < RSchedLow ? 1 : -1;
    1907             :   return 0;
    1908             : }
    1909             : 
    1910             : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
    1911             : /// Smaller number is the higher priority.
    1912             : static unsigned
    1913     3769103 : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
    1914     7538206 :   if (SUNumbers[SU->NodeNum] != 0)
    1915             :     return SUNumbers[SU->NodeNum];
    1916             : 
    1917             :   // Use WorkList to avoid stack overflow on excessively large IRs.
    1918             :   struct WorkState {
    1919     3769104 :     WorkState(const SUnit *SU) : SU(SU) {}
    1920             :     const SUnit *SU;
    1921             :     unsigned PredsProcessed = 0;
    1922             :   };
    1923             : 
    1924             :   SmallVector<WorkState, 16> WorkList;
    1925     1734200 :   WorkList.push_back(SU);
    1926     7538206 :   while (!WorkList.empty()) {
    1927             :     auto &Temp = WorkList.back();
    1928     5804007 :     auto *TempSU = Temp.SU;
    1929             :     bool AllPredsKnown = true;
    1930             :     // Try to find a non-evaluated pred and push it into the processing stack.
    1931    16894274 :     for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
    1932             :       auto &Pred = TempSU->Preds[P];
    1933     4678034 :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1934             :       SUnit *PredSU = Pred.getSUnit();
    1935     5454254 :       if (SUNumbers[PredSU->NodeNum] == 0) {
    1936             : #ifndef NDEBUG
    1937             :         // In debug mode, check that we don't have such element in the stack.
    1938             :         for (auto It : WorkList)
    1939             :           assert(It.SU != PredSU && "Trying to push an element twice?");
    1940             : #endif
    1941             :         // Next time start processing this one starting from the next pred.
    1942     2034904 :         Temp.PredsProcessed = P + 1;
    1943     2034904 :         WorkList.push_back(PredSU);
    1944             :         AllPredsKnown = false;
    1945             :         break;
    1946             :       }
    1947             :     }
    1948             : 
    1949     2034904 :     if (!AllPredsKnown)
    1950     2034904 :       continue;
    1951             : 
    1952             :     // Once all preds are known, we can calculate the answer for this one.
    1953             :     unsigned SethiUllmanNumber = 0;
    1954             :     unsigned Extra = 0;
    1955    13125169 :     for (const SDep &Pred : TempSU->Preds) {
    1956     4678033 :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1957             :       SUnit *PredSU = Pred.getSUnit();
    1958     5454252 :       unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
    1959             :       assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
    1960     2727126 :       if (PredSethiUllman > SethiUllmanNumber) {
    1961             :         SethiUllmanNumber = PredSethiUllman;
    1962             :         Extra = 0;
    1963      780498 :       } else if (PredSethiUllman == SethiUllmanNumber)
    1964      666653 :         ++Extra;
    1965             :     }
    1966             : 
    1967     3769103 :     SethiUllmanNumber += Extra;
    1968     3769103 :     if (SethiUllmanNumber == 0)
    1969             :       SethiUllmanNumber = 1;
    1970     7538206 :     SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
    1971             :     WorkList.pop_back();
    1972             :   }
    1973             : 
    1974             :   assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
    1975     3468398 :   return SUNumbers[SU->NodeNum];
    1976             : }
    1977             : 
    1978             : /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
    1979             : /// scheduling units.
    1980      310148 : void RegReductionPQBase::CalculateSethiUllmanNumbers() {
    1981      620295 :   SethiUllmanNumbers.assign(SUnits->size(), 0);
    1982             : 
    1983     4337132 :   for (const SUnit &SU : *SUnits)
    1984     3716837 :     CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
    1985      310148 : }
    1986             : 
    1987       32538 : void RegReductionPQBase::addNode(const SUnit *SU) {
    1988       65076 :   unsigned SUSize = SethiUllmanNumbers.size();
    1989       65076 :   if (SUnits->size() > SUSize)
    1990       19622 :     SethiUllmanNumbers.resize(SUSize*2, 0);
    1991       32538 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    1992       32538 : }
    1993             : 
    1994       19727 : void RegReductionPQBase::updateNode(const SUnit *SU) {
    1995       39454 :   SethiUllmanNumbers[SU->NodeNum] = 0;
    1996       19727 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    1997       19727 : }
    1998             : 
    1999             : // Lower priority means schedule further down. For bottom-up scheduling, lower
    2000             : // priority SUs are scheduled before higher priority SUs.
    2001    14650766 : unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
    2002             :   assert(SU->NodeNum < SethiUllmanNumbers.size());
    2003    14650766 :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2004    14650766 :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2005             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2006             :     // avoid spilling.
    2007             :     return 0;
    2008    28501738 :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2009    28501738 :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2010             :       Opc == TargetOpcode::INSERT_SUBREG)
    2011             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2012             :     // close to their uses to facilitate coalescing.
    2013             :     return 0;
    2014    14250869 :   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
    2015             :     // If SU does not have a register use, i.e. it doesn't produce a value
    2016             :     // that would be consumed (e.g. store), then it terminates a chain of
    2017             :     // computation.  Give it a large SethiUllman number so it will be
    2018             :     // scheduled right before its predecessors that it doesn't lengthen
    2019             :     // their live ranges.
    2020             :     return 0xffff;
    2021    11346131 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2022             :     // If SU does not have a register def, schedule it close to its uses
    2023             :     // because it does not lengthen any live ranges.
    2024             :     return 0;
    2025             : #if 1
    2026    18672638 :   return SethiUllmanNumbers[SU->NodeNum];
    2027             : #else
    2028             :   unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
    2029             :   if (SU->isCallOp) {
    2030             :     // FIXME: This assumes all of the defs are used as call operands.
    2031             :     int NP = (int)Priority - SU->getNode()->getNumValues();
    2032             :     return (NP > 0) ? NP : 0;
    2033             :   }
    2034             :   return Priority;
    2035             : #endif
    2036             : }
    2037             : 
    2038             : //===----------------------------------------------------------------------===//
    2039             : //                     Register Pressure Tracking
    2040             : //===----------------------------------------------------------------------===//
    2041             : 
    2042             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2043             : LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
    2044             :   for (const TargetRegisterClass *RC : TRI->regclasses()) {
    2045             :     unsigned Id = RC->getID();
    2046             :     unsigned RP = RegPressure[Id];
    2047             :     if (!RP) continue;
    2048             :     DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
    2049             :           << RegLimit[Id] << '\n');
    2050             :   }
    2051             : }
    2052             : #endif
    2053             : 
    2054      328784 : bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
    2055      328784 :   if (!TLI)
    2056             :     return false;
    2057             : 
    2058     1039242 :   for (const SDep &Pred : SU->Preds) {
    2059      375296 :     if (Pred.isCtrl())
    2060       23020 :       continue;
    2061             :     SUnit *PredSU = Pred.getSUnit();
    2062             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2063             :     // to cover the number of registers defined (they are all live).
    2064      352276 :     if (PredSU->NumRegDefsLeft == 0) {
    2065      145318 :       continue;
    2066             :     }
    2067      419701 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2068      632444 :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2069             :       unsigned RCId, Cost;
    2070      232810 :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2071             : 
    2072      698430 :       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
    2073       20067 :         return true;
    2074             :     }
    2075             :   }
    2076             :   return false;
    2077             : }
    2078             : 
    2079             : bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
    2080             :   const SDNode *N = SU->getNode();
    2081             : 
    2082             :   if (!N->isMachineOpcode() || !SU->NumSuccs)
    2083             :     return false;
    2084             : 
    2085             :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2086             :   for (unsigned i = 0; i != NumDefs; ++i) {
    2087             :     MVT VT = N->getSimpleValueType(i);
    2088             :     if (!N->hasAnyUseOfValue(i))
    2089             :       continue;
    2090             :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2091             :     if (RegPressure[RCId] >= RegLimit[RCId])
    2092             :       return true;
    2093             :   }
    2094             :   return false;
    2095             : }
    2096             : 
    2097             : // Compute the register pressure contribution by this instruction by count up
    2098             : // for uses that are not live and down for defs. Only count register classes
    2099             : // that are already under high pressure. As a side effect, compute the number of
    2100             : // uses of registers that are already live.
    2101             : //
    2102             : // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
    2103             : // so could probably be factored.
    2104      288742 : int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
    2105      288742 :   LiveUses = 0;
    2106             :   int PDiff = 0;
    2107      900552 :   for (const SDep &Pred : SU->Preds) {
    2108      305905 :     if (Pred.isCtrl())
    2109       25786 :       continue;
    2110             :     SUnit *PredSU = Pred.getSUnit();
    2111             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2112             :     // to cover the number of registers defined (they are all live).
    2113      347638 :     if (PredSU->NumRegDefsLeft == 0) {
    2114       67519 :       if (PredSU->getNode()->isMachineOpcode())
    2115       55793 :         ++LiveUses;
    2116       67519 :       continue;
    2117             :     }
    2118      425697 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2119      638794 :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2120      213097 :       MVT VT = RegDefPos.GetValue();
    2121      213097 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2122      639291 :       if (RegPressure[RCId] >= RegLimit[RCId])
    2123       81806 :         ++PDiff;
    2124             :     }
    2125             :   }
    2126      288742 :   const SDNode *N = SU->getNode();
    2127             : 
    2128      288742 :   if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
    2129             :     return PDiff;
    2130             : 
    2131      378615 :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2132      377961 :   for (unsigned i = 0; i != NumDefs; ++i) {
    2133      125878 :     MVT VT = N->getSimpleValueType(i);
    2134      125878 :     if (!N->hasAnyUseOfValue(i))
    2135           6 :       continue;
    2136      125872 :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2137      377616 :     if (RegPressure[RCId] >= RegLimit[RCId])
    2138       25125 :       --PDiff;
    2139             :   }
    2140             :   return PDiff;
    2141             : }
    2142             : 
    2143     3751493 : void RegReductionPQBase::scheduledNode(SUnit *SU) {
    2144     3751493 :   if (!TracksRegPressure)
    2145             :     return;
    2146             : 
    2147      216646 :   if (!SU->getNode())
    2148             :     return;
    2149             : 
    2150      690934 :   for (const SDep &Pred : SU->Preds) {
    2151      237145 :     if (Pred.isCtrl())
    2152       69669 :       continue;
    2153             :     SUnit *PredSU = Pred.getSUnit();
    2154             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2155             :     // to cover the number of registers defined (they are all live).
    2156      167476 :     if (PredSU->NumRegDefsLeft == 0) {
    2157       25070 :       continue;
    2158             :     }
    2159             :     // FIXME: The ScheduleDAG currently loses information about which of a
    2160             :     // node's values is consumed by each dependence. Consequently, if the node
    2161             :     // defines multiple register classes, we don't know which to pressurize
    2162             :     // here. Instead the following loop consumes the register defs in an
    2163             :     // arbitrary order. At least it handles the common case of clustered loads
    2164             :     // to the same class. For precise liveness, each SDep needs to indicate the
    2165             :     // result number. But that tightly couples the ScheduleDAG with the
    2166             :     // SelectionDAG making updates tricky. A simpler hack would be to attach a
    2167             :     // value type or register class to SDep.
    2168             :     //
    2169             :     // The most important aspect of register tracking is balancing the increase
    2170             :     // here with the reduction further below. Note that this SU may use multiple
    2171             :     // defs in PredSU. The can't be determined here, but we've already
    2172             :     // compensated by reducing NumRegDefsLeft in PredSU during
    2173             :     // ScheduleDAGSDNodes::AddSchedEdges.
    2174      142406 :     --PredSU->NumRegDefsLeft;
    2175      142406 :     unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
    2176      143003 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2177      143600 :          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2178      143003 :       if (SkipRegDefs)
    2179         597 :         continue;
    2180             : 
    2181             :       unsigned RCId, Cost;
    2182      142406 :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2183      284812 :       RegPressure[RCId] += Cost;
    2184      142406 :       break;
    2185             :     }
    2186             :   }
    2187             : 
    2188             :   // We should have this assert, but there may be dead SDNodes that never
    2189             :   // materialize as SUnits, so they don't appear to generate liveness.
    2190             :   //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
    2191      216644 :   int SkipRegDefs = (int)SU->NumRegDefsLeft;
    2192      362750 :   for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
    2193      508856 :        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2194      146106 :     if (SkipRegDefs > 0)
    2195          11 :       continue;
    2196             :     unsigned RCId, Cost;
    2197      146095 :     GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2198      292190 :     if (RegPressure[RCId] < Cost) {
    2199             :       // Register pressure tracking is imprecise. This can happen. But we try
    2200             :       // hard not to let it happen because it likely results in poor scheduling.
    2201             :       DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
    2202        3700 :       RegPressure[RCId] = 0;
    2203             :     }
    2204             :     else {
    2205      142395 :       RegPressure[RCId] -= Cost;
    2206             :     }
    2207             :   }
    2208             :   DEBUG(dumpRegPressure());
    2209             : }
    2210             : 
    2211        8472 : void RegReductionPQBase::unscheduledNode(SUnit *SU) {
    2212        8472 :   if (!TracksRegPressure)
    2213             :     return;
    2214             : 
    2215          92 :   const SDNode *N = SU->getNode();
    2216          92 :   if (!N) return;
    2217             : 
    2218          92 :   if (!N->isMachineOpcode()) {
    2219          15 :     if (N->getOpcode() != ISD::CopyToReg)
    2220             :       return;
    2221             :   } else {
    2222             :     unsigned Opc = N->getMachineOpcode();
    2223         154 :     if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2224          77 :         Opc == TargetOpcode::INSERT_SUBREG ||
    2225          77 :         Opc == TargetOpcode::SUBREG_TO_REG ||
    2226         134 :         Opc == TargetOpcode::REG_SEQUENCE ||
    2227          67 :         Opc == TargetOpcode::IMPLICIT_DEF)
    2228             :       return;
    2229             :   }
    2230             : 
    2231         387 :   for (const SDep &Pred : SU->Preds) {
    2232         160 :     if (Pred.isCtrl())
    2233          17 :       continue;
    2234             :     SUnit *PredSU = Pred.getSUnit();
    2235             :     // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
    2236             :     // counts data deps.
    2237         286 :     if (PredSU->NumSuccsLeft != PredSU->Succs.size())
    2238          68 :       continue;
    2239          75 :     const SDNode *PN = PredSU->getNode();
    2240          75 :     if (!PN->isMachineOpcode()) {
    2241          11 :       if (PN->getOpcode() == ISD::CopyFromReg) {
    2242          11 :         MVT VT = PN->getSimpleValueType(0);
    2243          11 :         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2244          22 :         RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2245             :       }
    2246          11 :       continue;
    2247             :     }
    2248             :     unsigned POpc = PN->getMachineOpcode();
    2249          64 :     if (POpc == TargetOpcode::IMPLICIT_DEF)
    2250           0 :       continue;
    2251         128 :     if (POpc == TargetOpcode::EXTRACT_SUBREG ||
    2252          64 :         POpc == TargetOpcode::INSERT_SUBREG ||
    2253          64 :         POpc == TargetOpcode::SUBREG_TO_REG) {
    2254           2 :       MVT VT = PN->getSimpleValueType(0);
    2255           2 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2256           4 :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2257           2 :       continue;
    2258             :     }
    2259         124 :     unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
    2260         160 :     for (unsigned i = 0; i != NumDefs; ++i) {
    2261          49 :       MVT VT = PN->getSimpleValueType(i);
    2262          49 :       if (!PN->hasAnyUseOfValue(i))
    2263           3 :         continue;
    2264          46 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2265          92 :       if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
    2266             :         // Register pressure tracking is imprecise. This can happen.
    2267          38 :         RegPressure[RCId] = 0;
    2268             :       else
    2269          54 :         RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
    2270             :     }
    2271             :   }
    2272             : 
    2273             :   // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
    2274             :   // may transfer data dependencies to CopyToReg.
    2275          67 :   if (SU->NumSuccs && N->isMachineOpcode()) {
    2276         141 :     unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2277         116 :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2278             :       MVT VT = N->getSimpleValueType(i);
    2279          11 :       if (VT == MVT::Glue || VT == MVT::Other)
    2280           0 :         continue;
    2281          19 :       if (!N->hasAnyUseOfValue(i))
    2282           8 :         continue;
    2283           3 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2284           6 :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2285             :     }
    2286             :   }
    2287             : 
    2288             :   DEBUG(dumpRegPressure());
    2289             : }
    2290             : 
    2291             : //===----------------------------------------------------------------------===//
    2292             : //           Dynamic Node Priority for Register Pressure Reduction
    2293             : //===----------------------------------------------------------------------===//
    2294             : 
    2295             : /// closestSucc - Returns the scheduled cycle of the successor which is
    2296             : /// closest to the current cycle.
    2297    13471984 : static unsigned closestSucc(const SUnit *SU) {
    2298             :   unsigned MaxHeight = 0;
    2299    46893258 :   for (const SDep &Succ : SU->Succs) {
    2300    16710637 :     if (Succ.isCtrl()) continue;  // ignore chain succs
    2301             :     unsigned Height = Succ.getSUnit()->getHeight();
    2302             :     // If there are bunch of CopyToRegs stacked up, they should be considered
    2303             :     // to be at the same position.
    2304    25298198 :     if (Succ.getSUnit()->getNode() &&
    2305    12649099 :         Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
    2306      244356 :       Height = closestSucc(Succ.getSUnit())+1;
    2307    12649099 :     if (Height > MaxHeight)
    2308             :       MaxHeight = Height;
    2309             :   }
    2310    13471984 :   return MaxHeight;
    2311             : }
    2312             : 
    2313             : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
    2314             : /// for scratch registers, i.e. number of data dependencies.
    2315             : static unsigned calcMaxScratches(const SUnit *SU) {
    2316             :   unsigned Scratches = 0;
    2317    19739288 :   for (const SDep &Pred : SU->Preds) {
    2318     7674581 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2319     5680264 :     Scratches++;
    2320             :   }
    2321             :   return Scratches;
    2322             : }
    2323             : 
    2324             : /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
    2325             : /// CopyFromReg from a virtual register.
    2326       52932 : static bool hasOnlyLiveInOpers(const SUnit *SU) {
    2327             :   bool RetVal = false;
    2328      118154 :   for (const SDep &Pred : SU->Preds) {
    2329       58092 :     if (Pred.isCtrl()) continue;
    2330             :     const SUnit *PredSU = Pred.getSUnit();
    2331       74954 :     if (PredSU->getNode() &&
    2332       37477 :         PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
    2333             :       unsigned Reg =
    2334       26970 :         cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
    2335       13485 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2336             :         RetVal = true;
    2337       11996 :         continue;
    2338             :       }
    2339             :     }
    2340             :     return false;
    2341             :   }
    2342             :   return RetVal;
    2343             : }
    2344             : 
    2345             : /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
    2346             : /// CopyToReg to a virtual register. This SU def is probably a liveout and
    2347             : /// it has no other use. It should be scheduled closer to the terminator.
    2348        6438 : static bool hasOnlyLiveOutUses(const SUnit *SU) {
    2349             :   bool RetVal = false;
    2350       12060 :   for (const SDep &Succ : SU->Succs) {
    2351        7601 :     if (Succ.isCtrl()) continue;
    2352             :     const SUnit *SuccSU = Succ.getSUnit();
    2353        6031 :     if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
    2354             :       unsigned Reg =
    2355        2490 :         cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
    2356        1245 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2357             :         RetVal = true;
    2358        1241 :         continue;
    2359             :       }
    2360             :     }
    2361             :     return false;
    2362             :   }
    2363             :   return RetVal;
    2364             : }
    2365             : 
    2366             : // Set isVRegCycle for a node with only live in opers and live out uses. Also
    2367             : // set isVRegCycle for its CopyFromReg operands.
    2368             : //
    2369             : // This is only relevant for single-block loops, in which case the VRegCycle
    2370             : // node is likely an induction variable in which the operand and target virtual
    2371             : // registers should be coalesced (e.g. pre/post increment values). Setting the
    2372             : // isVRegCycle flag helps the scheduler prioritize other uses of the same
    2373             : // CopyFromReg so that this node becomes the virtual register "kill". This
    2374             : // avoids interference between the values live in and out of the block and
    2375             : // eliminates a copy inside the loop.
    2376       52932 : static void initVRegCycle(SUnit *SU) {
    2377       52932 :   if (DisableSchedVRegCycle)
    2378             :     return;
    2379             : 
    2380       52932 :   if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
    2381             :     return;
    2382             : 
    2383             :   DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
    2384             : 
    2385         894 :   SU->isVRegCycle = true;
    2386             : 
    2387        3038 :   for (const SDep &Pred : SU->Preds) {
    2388        1072 :     if (Pred.isCtrl()) continue;
    2389        1057 :     Pred.getSUnit()->isVRegCycle = true;
    2390             :   }
    2391             : }
    2392             : 
    2393             : // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
    2394             : // CopyFromReg operands. We should no longer penalize other uses of this VReg.
    2395     3751493 : static void resetVRegCycle(SUnit *SU) {
    2396     3751493 :   if (!SU->isVRegCycle)
    2397             :     return;
    2398             : 
    2399        3038 :   for (const SDep &Pred : SU->Preds) {
    2400        1072 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2401             :     SUnit *PredSU = Pred.getSUnit();
    2402        1057 :     if (PredSU->isVRegCycle) {
    2403             :       assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
    2404             :              "VRegCycle def must be CopyFromReg");
    2405        1022 :       Pred.getSUnit()->isVRegCycle = false;
    2406             :     }
    2407             :   }
    2408             : }
    2409             : 
    2410             : // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
    2411             : // means a node that defines the VRegCycle has not been scheduled yet.
    2412     4444376 : static bool hasVRegCycleUse(const SUnit *SU) {
    2413             :   // If this SU also defines the VReg, don't hoist it as a "use".
    2414     4444376 :   if (SU->isVRegCycle)
    2415             :     return false;
    2416             : 
    2417    19063177 :   for (const SDep &Pred : SU->Preds) {
    2418     7309832 :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2419     5512085 :     if (Pred.getSUnit()->isVRegCycle &&
    2420        1122 :         Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
    2421             :       DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
    2422             :       return true;
    2423             :     }
    2424             :   }
    2425             :   return false;
    2426             : }
    2427             : 
    2428             : // Check for either a dependence (latency) or resource (hazard) stall.
    2429             : //
    2430             : // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
    2431             : static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
    2432     4242385 :   if ((int)SPQ->getCurCycle() < Height) return true;
    2433     8400420 :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2434             :       != ScheduleHazardRecognizer::NoHazard)
    2435             :     return true;
    2436             :   return false;
    2437             : }
    2438             : 
    2439             : // Return -1 if left has higher priority, 1 if right has higher priority.
    2440             : // Return 0 if latency-based priority is equivalent.
    2441     2222188 : static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
    2442             :                             RegReductionPQBase *SPQ) {
    2443             :   // Scheduling an instruction that uses a VReg whose postincrement has not yet
    2444             :   // been scheduled will induce a copy. Model this as an extra cycle of latency.
    2445     2222188 :   int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
    2446     2222188 :   int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
    2447     2222188 :   int LHeight = (int)left->getHeight() + LPenalty;
    2448     2222188 :   int RHeight = (int)right->getHeight() + RPenalty;
    2449             : 
    2450     2222188 :   bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
    2451             :     BUHasStall(left, LHeight, SPQ);
    2452     2222188 :   bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
    2453             :     BUHasStall(right, RHeight, SPQ);
    2454             : 
    2455             :   // If scheduling one of the node will cause a pipeline stall, delay it.
    2456             :   // If scheduling either one of the node will cause a pipeline stall, sort
    2457             :   // them according to their height.
    2458     2222188 :   if (LStall) {
    2459       43171 :     if (!RStall)
    2460             :       return 1;
    2461       36929 :     if (LHeight != RHeight)
    2462        5963 :       return LHeight > RHeight ? 1 : -1;
    2463     2179017 :   } else if (RStall)
    2464             :     return -1;
    2465             : 
    2466             :   // If either node is scheduling for latency, sort them by height/depth
    2467             :   // and latency.
    2468     2297945 :   if (!checkPref || (left->SchedulingPref == Sched::ILP ||
    2469       96099 :                      right->SchedulingPref == Sched::ILP)) {
    2470             :     // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
    2471             :     // is enabled, grouping instructions by cycle, then its height is already
    2472             :     // covered so only its depth matters. We also reach this point if both stall
    2473             :     // but have the same height.
    2474     4214150 :     if (!SPQ->getHazardRec()->isEnabled()) {
    2475     2003957 :       if (LHeight != RHeight)
    2476      131673 :         return LHeight > RHeight ? 1 : -1;
    2477             :     }
    2478     1975402 :     int LDepth = left->getDepth() - LPenalty;
    2479     1975402 :     int RDepth = right->getDepth() - RPenalty;
    2480     1975402 :     if (LDepth != RDepth) {
    2481             :       DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
    2482             :             << ") depth " << LDepth << " vs SU (" << right->NodeNum
    2483             :             << ") depth " << RDepth << "\n");
    2484      228104 :       return LDepth < RDepth ? 1 : -1;
    2485             :     }
    2486     1747298 :     if (left->Latency != right->Latency)
    2487        2253 :       return left->Latency > right->Latency ? 1 : -1;
    2488             :   }
    2489             :   return 0;
    2490             : }
    2491             : 
    2492     7332698 : static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
    2493             :   // Schedule physical register definitions close to their use. This is
    2494             :   // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
    2495             :   // long as shortening physreg live ranges is generally good, we can defer
    2496             :   // creating a subtarget hook.
    2497     7332698 :   if (!DisableSchedPhysRegJoin) {
    2498     7332698 :     bool LHasPhysReg = left->hasPhysRegDefs;
    2499     7332698 :     bool RHasPhysReg = right->hasPhysRegDefs;
    2500     7332698 :     if (LHasPhysReg != RHasPhysReg) {
    2501             :       #ifndef NDEBUG
    2502             :       static const char *const PhysRegMsg[] = { " has no physreg",
    2503             :                                                 " defines a physreg" };
    2504             :       #endif
    2505             :       DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
    2506             :             << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
    2507             :             << PhysRegMsg[RHasPhysReg] << "\n");
    2508        7315 :       return LHasPhysReg < RHasPhysReg;
    2509             :     }
    2510             :   }
    2511             : 
    2512             :   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
    2513     7325383 :   unsigned LPriority = SPQ->getNodePriority(left);
    2514     7325383 :   unsigned RPriority = SPQ->getNodePriority(right);
    2515             : 
    2516             :   // Be really careful about hoisting call operands above previous calls.
    2517             :   // Only allows it if it would reduce register pressure.
    2518     7325383 :   if (left->isCall && right->isCallOp) {
    2519         761 :     unsigned RNumVals = right->getNode()->getNumValues();
    2520         761 :     RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
    2521             :   }
    2522     7325383 :   if (right->isCall && left->isCallOp) {
    2523         785 :     unsigned LNumVals = left->getNode()->getNumValues();
    2524         785 :     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
    2525             :   }
    2526             : 
    2527     7325383 :   if (LPriority != RPriority)
    2528      709995 :     return LPriority > RPriority;
    2529             : 
    2530             :   // One or both of the nodes are calls and their sethi-ullman numbers are the
    2531             :   // same, then keep source order.
    2532     6615388 :   if (left->isCall || right->isCall) {
    2533             :     unsigned LOrder = SPQ->getNodeOrdering(left);
    2534             :     unsigned ROrder = SPQ->getNodeOrdering(right);
    2535             : 
    2536             :     // Prefer an ordering where the lower the non-zero order number, the higher
    2537             :     // the preference.
    2538        5464 :     if ((LOrder || ROrder) && LOrder != ROrder)
    2539        1574 :       return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2540             :   }
    2541             : 
    2542             :   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
    2543             :   // e.g.
    2544             :   // t1 = op t2, c1
    2545             :   // t3 = op t4, c2
    2546             :   //
    2547             :   // and the following instructions are both ready.
    2548             :   // t2 = op c3
    2549             :   // t4 = op c4
    2550             :   //
    2551             :   // Then schedule t2 = op first.
    2552             :   // i.e.
    2553             :   // t4 = op c4
    2554             :   // t2 = op c3
    2555             :   // t1 = op t2, c1
    2556             :   // t3 = op t4, c2
    2557             :   //
    2558             :   // This creates more short live intervals.
    2559     6613814 :   unsigned LDist = closestSucc(left);
    2560     6613814 :   unsigned RDist = closestSucc(right);
    2561     6613814 :   if (LDist != RDist)
    2562     4418751 :     return LDist < RDist;
    2563             : 
    2564             :   // How many registers becomes live when the node is scheduled.
    2565             :   unsigned LScratch = calcMaxScratches(left);
    2566             :   unsigned RScratch = calcMaxScratches(right);
    2567     2195063 :   if (LScratch != RScratch)
    2568      121323 :     return LScratch > RScratch;
    2569             : 
    2570             :   // Comparing latency against a call makes little sense unless the node
    2571             :   // is register pressure-neutral.
    2572     2073740 :   if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
    2573         344 :     return (left->NodeQueueId > right->NodeQueueId);
    2574             : 
    2575             :   // Do not compare latencies when one or both of the nodes are calls.
    2576     2073396 :   if (!DisableSchedCycles &&
    2577     2073389 :       !(left->isCall || right->isCall)) {
    2578     2073369 :     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
    2579     2073369 :     if (result != 0)
    2580      340896 :       return result > 0;
    2581             :   }
    2582             :   else {
    2583          27 :     if (left->getHeight() != right->getHeight())
    2584           0 :       return left->getHeight() > right->getHeight();
    2585             : 
    2586          27 :     if (left->getDepth() != right->getDepth())
    2587           8 :       return left->getDepth() < right->getDepth();
    2588             :   }
    2589             : 
    2590             :   assert(left->NodeQueueId && right->NodeQueueId &&
    2591             :          "NodeQueueId cannot be zero");
    2592     1732492 :   return (left->NodeQueueId > right->NodeQueueId);
    2593             : }
    2594             : 
    2595             : // Bottom up
    2596             : bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2597             :   if (int res = checkSpecialNodes(left, right))
    2598         216 :     return res > 0;
    2599             : 
    2600       21767 :   return BURRSort(left, right, SPQ);
    2601             : }
    2602             : 
    2603             : // Source order, otherwise bottom up.
    2604    12356413 : bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2605             :   if (int res = checkSpecialNodes(left, right))
    2606       68444 :     return res > 0;
    2607             : 
    2608    12287969 :   unsigned LOrder = SPQ->getNodeOrdering(left);
    2609             :   unsigned ROrder = SPQ->getNodeOrdering(right);
    2610             : 
    2611             :   // Prefer an ordering where the lower the non-zero order number, the higher
    2612             :   // the preference.
    2613    12287969 :   if ((LOrder || ROrder) && LOrder != ROrder)
    2614     5212132 :     return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2615             : 
    2616     7075837 :   return BURRSort(left, right, SPQ);
    2617             : }
    2618             : 
    2619             : // If the time between now and when the instruction will be ready can cover
    2620             : // the spill code, then avoid adding it to the ready queue. This gives long
    2621             : // stalls highest priority and allows hoisting across calls. It should also
    2622             : // speed up processing the available queue.
    2623             : bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2624             :   static const unsigned ReadyDelay = 3;
    2625             : 
    2626             :   if (SPQ->MayReduceRegPressure(SU)) return true;
    2627             : 
    2628             :   if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
    2629             : 
    2630             :   if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
    2631             :       != ScheduleHazardRecognizer::NoHazard)
    2632             :     return false;
    2633             : 
    2634             :   return true;
    2635             : }
    2636             : 
    2637             : // Return true if right should be scheduled with higher priority than left.
    2638      168991 : bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2639             :   if (int res = checkSpecialNodes(left, right))
    2640        1287 :     return res > 0;
    2641             : 
    2642      333377 :   if (left->isCall || right->isCall)
    2643             :     // No way to compute latency of calls.
    2644        3312 :     return BURRSort(left, right, SPQ);
    2645             : 
    2646      164392 :   bool LHigh = SPQ->HighRegPressure(left);
    2647      164392 :   bool RHigh = SPQ->HighRegPressure(right);
    2648             :   // Avoid causing spills. If register pressure is high, schedule for
    2649             :   // register pressure reduction.
    2650      164392 :   if (LHigh && !RHigh) {
    2651             :     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
    2652             :           << right->NodeNum << ")\n");
    2653             :     return true;
    2654             :   }
    2655      163500 :   else if (!LHigh && RHigh) {
    2656             :     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
    2657             :           << left->NodeNum << ")\n");
    2658             :     return false;
    2659             :   }
    2660      153313 :   if (!LHigh && !RHigh) {
    2661      148819 :     int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
    2662      148819 :     if (result != 0)
    2663       41476 :       return result > 0;
    2664             :   }
    2665      111837 :   return BURRSort(left, right, SPQ);
    2666             : }
    2667             : 
    2668             : // Schedule as many instructions in each cycle as possible. So don't make an
    2669             : // instruction available unless it is ready in the current cycle.
    2670             : bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2671             :   if (SU->getHeight() > CurCycle) return false;
    2672             : 
    2673             :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2674             :       != ScheduleHazardRecognizer::NoHazard)
    2675             :     return false;
    2676             : 
    2677             :   return true;
    2678             : }
    2679             : 
    2680       33948 : static bool canEnableCoalescing(SUnit *SU) {
    2681       33948 :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2682       33948 :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2683             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2684             :     // avoid spilling.
    2685             :     return true;
    2686             : 
    2687       51822 :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2688       51822 :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2689             :       Opc == TargetOpcode::INSERT_SUBREG)
    2690             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2691             :     // close to their uses to facilitate coalescing.
    2692             :     return true;
    2693             : 
    2694       25911 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2695             :     // If SU does not have a register def, schedule it close to its uses
    2696             :     // because it does not lengthen any live ranges.
    2697             :     return true;
    2698             : 
    2699       25911 :   return false;
    2700             : }
    2701             : 
    2702             : // list-ilp is currently an experimental scheduler that allows various
    2703             : // heuristics to be enabled prior to the normal register reduction logic.
    2704      150076 : bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2705             :   if (int res = checkSpecialNodes(left, right))
    2706        1866 :     return res > 0;
    2707             : 
    2708      294304 :   if (left->isCall || right->isCall)
    2709             :     // No way to compute latency of calls.
    2710        3839 :     return BURRSort(left, right, SPQ);
    2711             : 
    2712      144371 :   unsigned LLiveUses = 0, RLiveUses = 0;
    2713             :   int LPDiff = 0, RPDiff = 0;
    2714      144371 :   if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
    2715      144371 :     LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
    2716      144371 :     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
    2717             :   }
    2718      144371 :   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
    2719             :     DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
    2720             :           << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
    2721       16977 :     return LPDiff > RPDiff;
    2722             :   }
    2723             : 
    2724      254788 :   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
    2725       16974 :     bool LReduce = canEnableCoalescing(left);
    2726       16974 :     bool RReduce = canEnableCoalescing(right);
    2727       16974 :     if (LReduce && !RReduce) return false;
    2728       16561 :     if (RReduce && !LReduce) return true;
    2729             :   }
    2730             : 
    2731      126923 :   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
    2732             :     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
    2733             :           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
    2734           0 :     return LLiveUses < RLiveUses;
    2735             :   }
    2736             : 
    2737      126923 :   if (!DisableSchedStalls) {
    2738           0 :     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
    2739           0 :     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
    2740           0 :     if (LStall != RStall)
    2741           0 :       return left->getHeight() > right->getHeight();
    2742             :   }
    2743             : 
    2744      126923 :   if (!DisableSchedCriticalPath) {
    2745      253846 :     int spread = (int)left->getDepth() - (int)right->getDepth();
    2746      253846 :     if (std::abs(spread) > MaxReorderWindow) {
    2747             :       DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
    2748             :             << left->getDepth() << " != SU(" << right->NodeNum << "): "
    2749             :             << right->getDepth() << "\n");
    2750        2439 :       return left->getDepth() < right->getDepth();
    2751             :     }
    2752             :   }
    2753             : 
    2754      248968 :   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    2755       92864 :     int spread = (int)left->getHeight() - (int)right->getHeight();
    2756       92864 :     if (std::abs(spread) > MaxReorderWindow)
    2757        8378 :       return left->getHeight() > right->getHeight();
    2758             :   }
    2759             : 
    2760      116106 :   return BURRSort(left, right, SPQ);
    2761             : }
    2762             : 
    2763      310148 : void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
    2764      310148 :   SUnits = &sunits;
    2765             :   // Add pseudo dependency edges for two-address nodes.
    2766      310148 :   if (!Disable2AddrHack)
    2767           0 :     AddPseudoTwoAddrDeps();
    2768             :   // Reroute edges to nodes with multiple uses.
    2769      310148 :   if (!TracksRegPressure && !SrcOrder)
    2770        5251 :     PrescheduleNodesWithMultipleUses();
    2771             :   // Calculate node priorities.
    2772      310148 :   CalculateSethiUllmanNumbers();
    2773             : 
    2774             :   // For single block loops, mark nodes that look like canonical IV increments.
    2775      310148 :   if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
    2776       56357 :     for (SUnit &SU : sunits)
    2777       52932 :       initVRegCycle(&SU);
    2778      310148 : }
    2779             : 
    2780             : //===----------------------------------------------------------------------===//
    2781             : //                    Preschedule for Register Pressure
    2782             : //===----------------------------------------------------------------------===//
    2783             : 
    2784           0 : bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
    2785           0 :   if (SU->isTwoAddress) {
    2786           0 :     unsigned Opc = SU->getNode()->getMachineOpcode();
    2787           0 :     const MCInstrDesc &MCID = TII->get(Opc);
    2788           0 :     unsigned NumRes = MCID.getNumDefs();
    2789           0 :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    2790           0 :     for (unsigned i = 0; i != NumOps; ++i) {
    2791           0 :       if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
    2792           0 :         SDNode *DU = SU->getNode()->getOperand(i).getNode();
    2793           0 :         if (DU->getNodeId() != -1 &&
    2794           0 :             Op->OrigNode == &(*SUnits)[DU->getNodeId()])
    2795             :           return true;
    2796             :       }
    2797             :     }
    2798             :   }
    2799             :   return false;
    2800             : }
    2801             : 
    2802             : /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
    2803             : /// successor's explicit physregs whose definition can reach DepSU.
    2804             : /// i.e. DepSU should not be scheduled above SU.
    2805           0 : static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
    2806             :                                          ScheduleDAGRRList *scheduleDAG,
    2807             :                                          const TargetInstrInfo *TII,
    2808             :                                          const TargetRegisterInfo *TRI) {
    2809             :   const MCPhysReg *ImpDefs
    2810           0 :     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
    2811             :   const uint32_t *RegMask = getNodeRegMask(SU->getNode());
    2812           0 :   if(!ImpDefs && !RegMask)
    2813             :     return false;
    2814             : 
    2815           0 :   for (const SDep &Succ : SU->Succs) {
    2816             :     SUnit *SuccSU = Succ.getSUnit();
    2817           0 :     for (const SDep &SuccPred : SuccSU->Preds) {
    2818           0 :       if (!SuccPred.isAssignedRegDep())
    2819           0 :         continue;
    2820             : 
    2821           0 :       if (RegMask &&
    2822           0 :           MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
    2823             :           scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2824             :         return true;
    2825             : 
    2826           0 :       if (ImpDefs)
    2827           0 :         for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
    2828             :           // Return true if SU clobbers this physical register use and the
    2829             :           // definition of the register reaches from DepSU. IsReachable queries
    2830             :           // a topological forward sort of the DAG (following the successors).
    2831           0 :           if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
    2832             :               scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2833             :             return true;
    2834             :     }
    2835             :   }
    2836             :   return false;
    2837             : }
    2838             : 
    2839             : /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
    2840             : /// physical register defs.
    2841           4 : static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
    2842             :                                   const TargetInstrInfo *TII,
    2843             :                                   const TargetRegisterInfo *TRI) {
    2844           4 :   SDNode *N = SuccSU->getNode();
    2845          12 :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2846           4 :   const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
    2847             :   assert(ImpDefs && "Caller should check hasPhysRegDefs");
    2848           4 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
    2849             :        SUNode = SUNode->getGluedNode()) {
    2850           8 :     if (!SUNode->isMachineOpcode())
    2851           0 :       continue;
    2852             :     const MCPhysReg *SUImpDefs =
    2853          16 :       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
    2854             :     const uint32_t *SURegMask = getNodeRegMask(SUNode);
    2855           8 :     if (!SUImpDefs && !SURegMask)
    2856           0 :       continue;
    2857          24 :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2858             :       MVT VT = N->getSimpleValueType(i);
    2859           8 :       if (VT == MVT::Glue || VT == MVT::Other)
    2860           0 :         continue;
    2861           8 :       if (!N->hasAnyUseOfValue(i))
    2862           0 :         continue;
    2863           8 :       unsigned Reg = ImpDefs[i - NumDefs];
    2864          12 :       if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
    2865             :         return true;
    2866           4 :       if (!SUImpDefs)
    2867           0 :         continue;
    2868          12 :       for (;*SUImpDefs; ++SUImpDefs) {
    2869           4 :         unsigned SUReg = *SUImpDefs;
    2870           4 :         if (TRI->regsOverlap(Reg, SUReg))
    2871             :           return true;
    2872             :       }
    2873             :     }
    2874             :   }
    2875             :   return false;
    2876             : }
    2877             : 
    2878             : /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
    2879             : /// are not handled well by the general register pressure reduction
    2880             : /// heuristics. When presented with code like this:
    2881             : ///
    2882             : ///      N
    2883             : ///    / |
    2884             : ///   /  |
    2885             : ///  U  store
    2886             : ///  |
    2887             : /// ...
    2888             : ///
    2889             : /// the heuristics tend to push the store up, but since the
    2890             : /// operand of the store has another use (U), this would increase
    2891             : /// the length of that other use (the U->N edge).
    2892             : ///
    2893             : /// This function transforms code like the above to route U's
    2894             : /// dependence through the store when possible, like this:
    2895             : ///
    2896             : ///      N
    2897             : ///      ||
    2898             : ///      ||
    2899             : ///     store
    2900             : ///       |
    2901             : ///       U
    2902             : ///       |
    2903             : ///      ...
    2904             : ///
    2905             : /// This results in the store being scheduled immediately
    2906             : /// after N, which shortens the U->N live range, reducing
    2907             : /// register pressure.
    2908        5251 : void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
    2909             :   // Visit all the nodes in topological order, working top-down.
    2910       42813 :   for (SUnit &SU : *SUnits) {
    2911             :     // For now, only look at nodes with no data successors, such as stores.
    2912             :     // These are especially important, due to the heuristics in
    2913             :     // getNodePriority for nodes with no data successors.
    2914       32311 :     if (SU.NumSuccs != 0)
    2915       19703 :       continue;
    2916             :     // For now, only look at nodes with exactly one data predecessor.
    2917       12608 :     if (SU.NumPreds != 1)
    2918        7637 :       continue;
    2919             :     // Avoid prescheduling copies to virtual registers, which don't behave
    2920             :     // like other nodes from the perspective of scheduling heuristics.
    2921        4971 :     if (SDNode *N = SU.getNode())
    2922        5789 :       if (N->getOpcode() == ISD::CopyToReg &&
    2923             :           TargetRegisterInfo::isVirtualRegister
    2924        1636 :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2925         781 :         continue;
    2926             : 
    2927             :     // Locate the single data predecessor.
    2928             :     SUnit *PredSU = nullptr;
    2929        5722 :     for (const SDep &Pred : SU.Preds)
    2930        4956 :       if (!Pred.isCtrl()) {
    2931             :         PredSU = Pred.getSUnit();
    2932        4190 :         break;
    2933             :       }
    2934             :     assert(PredSU);
    2935             : 
    2936             :     // Don't rewrite edges that carry physregs, because that requires additional
    2937             :     // support infrastructure.
    2938        4190 :     if (PredSU->hasPhysRegDefs)
    2939           8 :       continue;
    2940             :     // Short-circuit the case where SU is PredSU's only data successor.
    2941        4182 :     if (PredSU->NumSuccs == 1)
    2942        3663 :       continue;
    2943             :     // Avoid prescheduling to copies from virtual registers, which don't behave
    2944             :     // like other nodes from the perspective of scheduling heuristics.
    2945         519 :     if (SDNode *N = SU.getNode())
    2946         525 :       if (N->getOpcode() == ISD::CopyFromReg &&
    2947             :           TargetRegisterInfo::isVirtualRegister
    2948          12 :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2949           0 :         continue;
    2950             : 
    2951             :     // Perform checks on the successors of PredSU.
    2952        1291 :     for (const SDep &PredSucc : PredSU->Succs) {
    2953             :       SUnit *PredSuccSU = PredSucc.getSUnit();
    2954         848 :       if (PredSuccSU == &SU) continue;
    2955             :       // If PredSU has another successor with no data successors, for
    2956             :       // now don't attempt to choose either over the other.
    2957         545 :       if (PredSuccSU->NumSuccs == 0)
    2958             :         goto outer_loop_continue;
    2959             :       // Don't break physical register dependencies.
    2960         106 :       if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
    2961           4 :         if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
    2962             :           goto outer_loop_continue;
    2963             :       // Don't introduce graph cycles.
    2964         204 :       if (scheduleDAG->IsReachable(&SU, PredSuccSU))
    2965             :         goto outer_loop_continue;
    2966             :     }
    2967             : 
    2968             :     // Ok, the transformation is safe and the heuristics suggest it is
    2969             :     // profitable. Update the graph.
    2970             :     DEBUG(dbgs() << "    Prescheduling SU #" << SU.NodeNum
    2971             :                  << " next to PredSU #" << PredSU->NodeNum
    2972             :                  << " to guide scheduling in the presence of multiple uses\n");
    2973         492 :     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
    2974         126 :       SDep Edge = PredSU->Succs[i];
    2975             :       assert(!Edge.isAssignedRegDep());
    2976             :       SUnit *SuccSU = Edge.getSUnit();
    2977         126 :       if (SuccSU != &SU) {
    2978             :         Edge.setSUnit(PredSU);
    2979          69 :         scheduleDAG->RemovePred(SuccSU, Edge);
    2980          69 :         scheduleDAG->AddPred(&SU, Edge);
    2981             :         Edge.setSUnit(&SU);
    2982          69 :         scheduleDAG->AddPred(SuccSU, Edge);
    2983          69 :         --i;
    2984             :       }
    2985             :     }
    2986         519 :   outer_loop_continue:;
    2987             :   }
    2988        5251 : }
    2989             : 
    2990             : /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
    2991             : /// it as a def&use operand. Add a pseudo control edge from it to the other
    2992             : /// node (if it won't create a cycle) so the two-address one will be scheduled
    2993             : /// first (lower in the schedule). If both nodes are two-address, favor the
    2994             : /// one that has a CopyToReg use (more likely to be a loop induction update).
    2995             : /// If both are two-address, but one is commutable while the other is not
    2996             : /// commutable, favor the one that's not commutable.
    2997           0 : void RegReductionPQBase::AddPseudoTwoAddrDeps() {
    2998           0 :   for (SUnit &SU : *SUnits) {
    2999           0 :     if (!SU.isTwoAddress)
    3000           0 :       continue;
    3001             : 
    3002           0 :     SDNode *Node = SU.getNode();
    3003           0 :     if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
    3004           0 :       continue;
    3005             : 
    3006           0 :     bool isLiveOut = hasOnlyLiveOutUses(&SU);
    3007             :     unsigned Opc = Node->getMachineOpcode();
    3008           0 :     const MCInstrDesc &MCID = TII->get(Opc);
    3009           0 :     unsigned NumRes = MCID.getNumDefs();
    3010           0 :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    3011           0 :     for (unsigned j = 0; j != NumOps; ++j) {
    3012           0 :       if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
    3013           0 :         continue;
    3014           0 :       SDNode *DU = SU.getNode()->getOperand(j).getNode();
    3015           0 :       if (DU->getNodeId() == -1)
    3016           0 :         continue;
    3017           0 :       const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
    3018           0 :       if (!DUSU)
    3019           0 :         continue;
    3020           0 :       for (const SDep &Succ : DUSU->Succs) {
    3021           0 :         if (Succ.isCtrl())
    3022           0 :           continue;
    3023             :         SUnit *SuccSU = Succ.getSUnit();
    3024           0 :         if (SuccSU == &SU)
    3025           0 :           continue;
    3026             :         // Be conservative. Ignore if nodes aren't at roughly the same
    3027             :         // depth and height.
    3028           0 :         if (SuccSU->getHeight() < SU.getHeight() &&
    3029           0 :             (SU.getHeight() - SuccSU->getHeight()) > 1)
    3030           0 :           continue;
    3031             :         // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
    3032             :         // constrains whatever is using the copy, instead of the copy
    3033             :         // itself. In the case that the copy is coalesced, this
    3034             :         // preserves the intent of the pseudo two-address heurietics.
    3035           0 :         while (SuccSU->Succs.size() == 1 &&
    3036           0 :                SuccSU->getNode()->isMachineOpcode() &&
    3037             :                SuccSU->getNode()->getMachineOpcode() ==
    3038             :                  TargetOpcode::COPY_TO_REGCLASS)
    3039             :           SuccSU = SuccSU->Succs.front().getSUnit();
    3040             :         // Don't constrain non-instruction nodes.
    3041           0 :         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
    3042           0 :           continue;
    3043             :         // Don't constrain nodes with physical register defs if the
    3044             :         // predecessor can clobber them.
    3045           0 :         if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
    3046           0 :           if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
    3047           0 :             continue;
    3048             :         }
    3049             :         // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
    3050             :         // these may be coalesced away. We want them close to their uses.
    3051           0 :         unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
    3052           0 :         if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
    3053           0 :             SuccOpc == TargetOpcode::INSERT_SUBREG ||
    3054           0 :             SuccOpc == TargetOpcode::SUBREG_TO_REG)
    3055           0 :           continue;
    3056           0 :         if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
    3057           0 :             (!canClobber(SuccSU, DUSU) ||
    3058           0 :              (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
    3059           0 :              (!SU.isCommutable && SuccSU->isCommutable)) &&
    3060           0 :             !scheduleDAG->IsReachable(SuccSU, &SU)) {
    3061             :           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
    3062             :                        << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
    3063           0 :           scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
    3064             :         }
    3065             :       }
    3066             :     }
    3067             :   }
    3068           0 : }
    3069             : 
    3070             : //===----------------------------------------------------------------------===//
    3071             : //                         Public Constructor Functions
    3072             : //===----------------------------------------------------------------------===//
    3073             : 
    3074             : ScheduleDAGSDNodes *
    3075        5251 : llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
    3076             :                                  CodeGenOpt::Level OptLevel) {
    3077        5251 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3078        5251 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3079        5251 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3080             : 
    3081             :   BURegReductionPriorityQueue *PQ =
    3082        5251 :     new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
    3083        5251 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3084             :   PQ->setScheduleDAG(SD);
    3085        5251 :   return SD;
    3086             : }
    3087             : 
    3088             : ScheduleDAGSDNodes *
    3089      276944 : llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
    3090             :                                    CodeGenOpt::Level OptLevel) {
    3091      276944 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3092      276944 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3093      276944 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3094             : 
    3095             :   SrcRegReductionPriorityQueue *PQ =
    3096      276944 :     new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
    3097      276944 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3098             :   PQ->setScheduleDAG(SD);
    3099      276943 :   return SD;
    3100             : }
    3101             : 
    3102             : ScheduleDAGSDNodes *
    3103       12513 : llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
    3104             :                                    CodeGenOpt::Level OptLevel) {
    3105       12513 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3106       12513 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3107       12513 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3108       12513 :   const TargetLowering *TLI = IS->TLI;
    3109             : 
    3110             :   HybridBURRPriorityQueue *PQ =
    3111       12513 :     new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3112             : 
    3113       12513 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3114             :   PQ->setScheduleDAG(SD);
    3115       12513 :   return SD;
    3116             : }
    3117             : 
    3118             : ScheduleDAGSDNodes *
    3119       15440 : llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
    3120             :                                 CodeGenOpt::Level OptLevel) {
    3121       15440 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3122       15440 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3123       15440 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3124       15440 :   const TargetLowering *TLI = IS->TLI;
    3125             : 
    3126             :   ILPBURRPriorityQueue *PQ =
    3127       15440 :     new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3128       15440 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3129             :   PQ->setScheduleDAG(SD);
    3130       15440 :   return SD;
    3131      245058 : }

Generated by: LCOV version 1.13