LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - ScheduleDAGRRList.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 944 1150 82.1 %
Date: 2018-10-20 13:21:21 Functions: 70 97 72.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This implements bottom-up and top-down register pressure reduction list
      11             : // schedulers, using standard algorithms.  The basic approach uses a priority
      12             : // queue of available nodes to schedule.  One at a time, nodes are taken from
      13             : // the priority queue (thus in priority order), checked for legality to
      14             : // schedule, and emitted if legal.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #include "ScheduleDAGSDNodes.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/DenseMap.h"
      21             : #include "llvm/ADT/STLExtras.h"
      22             : #include "llvm/ADT/SmallSet.h"
      23             : #include "llvm/ADT/SmallVector.h"
      24             : #include "llvm/ADT/Statistic.h"
      25             : #include "llvm/CodeGen/ISDOpcodes.h"
      26             : #include "llvm/CodeGen/MachineFunction.h"
      27             : #include "llvm/CodeGen/MachineOperand.h"
      28             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      29             : #include "llvm/CodeGen/ScheduleDAG.h"
      30             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      31             : #include "llvm/CodeGen/SchedulerRegistry.h"
      32             : #include "llvm/CodeGen/SelectionDAGISel.h"
      33             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      34             : #include "llvm/CodeGen/TargetInstrInfo.h"
      35             : #include "llvm/CodeGen/TargetLowering.h"
      36             : #include "llvm/CodeGen/TargetOpcodes.h"
      37             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      39             : #include "llvm/Config/llvm-config.h"
      40             : #include "llvm/IR/InlineAsm.h"
      41             : #include "llvm/MC/MCInstrDesc.h"
      42             : #include "llvm/MC/MCRegisterInfo.h"
      43             : #include "llvm/Support/Casting.h"
      44             : #include "llvm/Support/CodeGen.h"
      45             : #include "llvm/Support/CommandLine.h"
      46             : #include "llvm/Support/Compiler.h"
      47             : #include "llvm/Support/Debug.h"
      48             : #include "llvm/Support/ErrorHandling.h"
      49             : #include "llvm/Support/MachineValueType.h"
      50             : #include "llvm/Support/raw_ostream.h"
      51             : #include <algorithm>
      52             : #include <cassert>
      53             : #include <cstdint>
      54             : #include <cstdlib>
      55             : #include <iterator>
      56             : #include <limits>
      57             : #include <memory>
      58             : #include <utility>
      59             : #include <vector>
      60             : 
      61             : using namespace llvm;
      62             : 
      63             : #define DEBUG_TYPE "pre-RA-sched"
      64             : 
      65             : STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
      66             : STATISTIC(NumUnfolds,    "Number of nodes unfolded");
      67             : STATISTIC(NumDups,       "Number of duplicated nodes");
      68             : STATISTIC(NumPRCopies,   "Number of physical register copies");
      69             : 
      70             : static RegisterScheduler
      71             :   burrListDAGScheduler("list-burr",
      72             :                        "Bottom-up register reduction list scheduling",
      73             :                        createBURRListDAGScheduler);
      74             : 
      75             : static RegisterScheduler
      76             :   sourceListDAGScheduler("source",
      77             :                          "Similar to list-burr but schedules in source "
      78             :                          "order when possible",
      79             :                          createSourceListDAGScheduler);
      80             : 
      81             : static RegisterScheduler
      82             :   hybridListDAGScheduler("list-hybrid",
      83             :                          "Bottom-up register pressure aware list scheduling "
      84             :                          "which tries to balance latency and register pressure",
      85             :                          createHybridListDAGScheduler);
      86             : 
      87             : static RegisterScheduler
      88             :   ILPListDAGScheduler("list-ilp",
      89             :                       "Bottom-up register pressure aware list scheduling "
      90             :                       "which tries to balance ILP and register pressure",
      91             :                       createILPListDAGScheduler);
      92             : 
      93             : static cl::opt<bool> DisableSchedCycles(
      94             :   "disable-sched-cycles", cl::Hidden, cl::init(false),
      95             :   cl::desc("Disable cycle-level precision during preRA scheduling"));
      96             : 
      97             : // Temporary sched=list-ilp flags until the heuristics are robust.
      98             : // Some options are also available under sched=list-hybrid.
      99             : static cl::opt<bool> DisableSchedRegPressure(
     100             :   "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
     101             :   cl::desc("Disable regpressure priority in sched=list-ilp"));
     102             : static cl::opt<bool> DisableSchedLiveUses(
     103             :   "disable-sched-live-uses", cl::Hidden, cl::init(true),
     104             :   cl::desc("Disable live use priority in sched=list-ilp"));
     105             : static cl::opt<bool> DisableSchedVRegCycle(
     106             :   "disable-sched-vrcycle", cl::Hidden, cl::init(false),
     107             :   cl::desc("Disable virtual register cycle interference checks"));
     108             : static cl::opt<bool> DisableSchedPhysRegJoin(
     109             :   "disable-sched-physreg-join", cl::Hidden, cl::init(false),
     110             :   cl::desc("Disable physreg def-use affinity"));
     111             : static cl::opt<bool> DisableSchedStalls(
     112             :   "disable-sched-stalls", cl::Hidden, cl::init(true),
     113             :   cl::desc("Disable no-stall priority in sched=list-ilp"));
     114             : static cl::opt<bool> DisableSchedCriticalPath(
     115             :   "disable-sched-critical-path", cl::Hidden, cl::init(false),
     116             :   cl::desc("Disable critical path priority in sched=list-ilp"));
     117             : static cl::opt<bool> DisableSchedHeight(
     118             :   "disable-sched-height", cl::Hidden, cl::init(false),
     119             :   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
     120             : static cl::opt<bool> Disable2AddrHack(
     121             :   "disable-2addr-hack", cl::Hidden, cl::init(true),
     122             :   cl::desc("Disable scheduler's two-address hack"));
     123             : 
     124             : static cl::opt<int> MaxReorderWindow(
     125             :   "max-sched-reorder", cl::Hidden, cl::init(6),
     126             :   cl::desc("Number of instructions to allow ahead of the critical path "
     127             :            "in sched=list-ilp"));
     128             : 
     129             : static cl::opt<unsigned> AvgIPC(
     130             :   "sched-avg-ipc", cl::Hidden, cl::init(1),
     131             :   cl::desc("Average inst/cycle whan no target itinerary exists."));
     132             : 
     133             : namespace {
     134             : 
     135             : //===----------------------------------------------------------------------===//
     136             : /// ScheduleDAGRRList - The actual register reduction list scheduler
     137             : /// implementation.  This supports both top-down and bottom-up scheduling.
     138             : ///
     139             : class ScheduleDAGRRList : public ScheduleDAGSDNodes {
     140             : private:
     141             :   /// NeedLatency - True if the scheduler will make use of latency information.
     142             :   bool NeedLatency;
     143             : 
     144             :   /// AvailableQueue - The priority queue to use for the available SUnits.
     145             :   SchedulingPriorityQueue *AvailableQueue;
     146             : 
     147             :   /// PendingQueue - This contains all of the instructions whose operands have
     148             :   /// been issued, but their results are not ready yet (due to the latency of
     149             :   /// the operation).  Once the operands becomes available, the instruction is
     150             :   /// added to the AvailableQueue.
     151             :   std::vector<SUnit *> PendingQueue;
     152             : 
     153             :   /// HazardRec - The hazard recognizer to use.
     154             :   ScheduleHazardRecognizer *HazardRec;
     155             : 
     156             :   /// CurCycle - The current scheduler state corresponds to this cycle.
     157             :   unsigned CurCycle = 0;
     158             : 
     159             :   /// MinAvailableCycle - Cycle of the soonest available instruction.
     160             :   unsigned MinAvailableCycle;
     161             : 
     162             :   /// IssueCount - Count instructions issued in this cycle
     163             :   /// Currently valid only for bottom-up scheduling.
     164             :   unsigned IssueCount;
     165             : 
     166             :   /// LiveRegDefs - A set of physical registers and their definition
     167             :   /// that are "live". These nodes must be scheduled before any other nodes that
     168             :   /// modifies the registers can be scheduled.
     169             :   unsigned NumLiveRegs;
     170             :   std::unique_ptr<SUnit*[]> LiveRegDefs;
     171             :   std::unique_ptr<SUnit*[]> LiveRegGens;
     172             : 
     173             :   // Collect interferences between physical register use/defs.
     174             :   // Each interference is an SUnit and set of physical registers.
     175             :   SmallVector<SUnit*, 4> Interferences;
     176             : 
     177             :   using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
     178             : 
     179             :   LRegsMapT LRegsMap;
     180             : 
     181             :   /// Topo - A topological ordering for SUnits which permits fast IsReachable
     182             :   /// and similar queries.
     183             :   ScheduleDAGTopologicalSort Topo;
     184             : 
     185             :   // Hack to keep track of the inverse of FindCallSeqStart without more crazy
     186             :   // DAG crawling.
     187             :   DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
     188             : 
     189             : public:
     190     1269002 :   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
     191             :                     SchedulingPriorityQueue *availqueue,
     192             :                     CodeGenOpt::Level OptLevel)
     193     1269002 :     : ScheduleDAGSDNodes(mf),
     194             :       NeedLatency(needlatency), AvailableQueue(availqueue),
     195     1269002 :       Topo(SUnits, nullptr) {
     196     1269002 :     const TargetSubtargetInfo &STI = mf.getSubtarget();
     197     1269002 :     if (DisableSchedCycles || !NeedLatency)
     198     1241781 :       HazardRec = new ScheduleHazardRecognizer();
     199             :     else
     200       27221 :       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
     201     1269002 :   }
     202             : 
     203     3807006 :   ~ScheduleDAGRRList() override {
     204     1269002 :     delete HazardRec;
     205     1269002 :     delete AvailableQueue;
     206     2538004 :   }
     207     1269002 : 
     208             :   void Schedule() override;
     209             : 
     210     1269002 :   ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
     211     2538004 : 
     212     1269002 :   /// IsReachable - Checks if SU is reachable from TargetSU.
     213     1269002 :   bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
     214     1269002 :     return Topo.IsReachable(SU, TargetSU);
     215             :   }
     216             : 
     217             :   /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
     218           0 :   /// create a cycle.
     219             :   bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
     220             :     return Topo.WillCreateCycle(SU, TargetSU);
     221             :   }
     222           0 : 
     223             :   /// AddPred - adds a predecessor edge to SUnit SU.
     224             :   /// This returns true if this is a new predecessor.
     225             :   /// Updates the topological ordering if required.
     226             :   void AddPred(SUnit *SU, const SDep &D) {
     227             :     Topo.AddPred(SU, D.getSUnit());
     228        3423 :     SU->addPred(D);
     229             :   }
     230             : 
     231             :   /// RemovePred - removes a predecessor edge from SUnit SU.
     232             :   /// This returns true if an edge was removed.
     233             :   /// Updates the topological ordering if required.
     234        3600 :   void RemovePred(SUnit *SU, const SDep &D) {
     235        7200 :     Topo.RemovePred(SU, D.getSUnit());
     236        3600 :     SU->removePred(D);
     237        3600 :   }
     238             : 
     239             : private:
     240             :   bool isReady(SUnit *SU) {
     241             :     return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
     242        1344 :       AvailableQueue->isReady(SU);
     243        2688 :   }
     244        1344 : 
     245        1344 :   void ReleasePred(SUnit *SU, const SDep *PredEdge);
     246             :   void ReleasePredecessors(SUnit *SU);
     247             :   void ReleasePending();
     248           0 :   void AdvanceToCycle(unsigned NextCycle);
     249    15049218 :   void AdvancePastStalls(SUnit *SU);
     250           0 :   void EmitNode(SUnit *SU);
     251             :   void ScheduleNodeBottomUp(SUnit*);
     252             :   void CapturePred(SDep *PredEdge);
     253             :   void UnscheduleNodeBottomUp(SUnit*);
     254             :   void RestoreHazardCheckerBottomUp();
     255             :   void BacktrackBottomUp(SUnit*, SUnit*);
     256             :   SUnit *TryUnfoldSU(SUnit *);
     257             :   SUnit *CopyAndMoveSuccessors(SUnit*);
     258             :   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
     259             :                                 const TargetRegisterClass*,
     260             :                                 const TargetRegisterClass*,
     261             :                                 SmallVectorImpl<SUnit*>&);
     262             :   bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
     263             : 
     264             :   void releaseInterferences(unsigned Reg = 0);
     265             : 
     266             :   SUnit *PickNodeToScheduleBottomUp();
     267             :   void ListScheduleBottomUp();
     268             : 
     269             :   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
     270             :   /// Updates the topological ordering if required.
     271             :   SUnit *CreateNewSUnit(SDNode *N) {
     272             :     unsigned NumSUnits = SUnits.size();
     273             :     SUnit *NewNode = newSUnit(N);
     274             :     // Update the topological ordering.
     275             :     if (NewNode->NodeNum >= NumSUnits)
     276             :       Topo.InitDAGTopologicalSorting();
     277             :     return NewNode;
     278             :   }
     279         376 : 
     280         376 :   /// CreateClone - Creates a new SUnit from an existing one.
     281         376 :   /// Updates the topological ordering if required.
     282             :   SUnit *CreateClone(SUnit *N) {
     283         376 :     unsigned NumSUnits = SUnits.size();
     284         376 :     SUnit *NewNode = Clone(N);
     285         376 :     // Update the topological ordering.
     286             :     if (NewNode->NodeNum >= NumSUnits)
     287             :       Topo.InitDAGTopologicalSorting();
     288             :     return NewNode;
     289             :   }
     290         274 : 
     291         274 :   /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
     292         274 :   /// need actual latency information but the hybrid scheduler does.
     293             :   bool forceUnitLatencies() const override {
     294         274 :     return !NeedLatency;
     295         274 :   }
     296         274 : };
     297             : 
     298             : }  // end anonymous namespace
     299             : 
     300             : /// GetCostForDef - Looks up the register class and cost for a given definition.
     301    16340128 : /// Typically this just means looking up the representative register class,
     302    36122992 : /// but for untyped values (MVT::Untyped) it means inspecting the node's
     303             : /// opcode to determine what register class is being generated.
     304             : static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
     305             :                           const TargetLowering *TLI,
     306             :                           const TargetInstrInfo *TII,
     307             :                           const TargetRegisterInfo *TRI,
     308             :                           unsigned &RegClass, unsigned &Cost,
     309             :                           const MachineFunction &MF) {
     310             :   MVT VT = RegDefPos.GetValue();
     311             : 
     312      491120 :   // Special handling for untyped values.  These values can only come from
     313             :   // the expansion of custom DAG-to-DAG patterns.
     314             :   if (VT == MVT::Untyped) {
     315             :     const SDNode *Node = RegDefPos.GetNode();
     316             : 
     317             :     // Special handling for CopyFromReg of untyped values.
     318             :     if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
     319             :       unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     320             :       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
     321             :       RegClass = RC->getID();
     322      491120 :       Cost = 1;
     323        1976 :       return;
     324             :     }
     325             : 
     326        1976 :     unsigned Opcode = Node->getMachineOpcode();
     327         160 :     if (Opcode == TargetOpcode::REG_SEQUENCE) {
     328         160 :       unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
     329         160 :       const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
     330         160 :       RegClass = RC->getID();
     331         421 :       Cost = 1;
     332             :       return;
     333             :     }
     334             : 
     335        1816 :     unsigned Idx = RegDefPos.GetIdx();
     336         522 :     const MCInstrDesc Desc = TII->get(Opcode);
     337         261 :     const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
     338         261 :     RegClass = RC->getID();
     339         261 :     // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
     340         261 :     // better way to determine it.
     341             :     Cost = 1;
     342             :   } else {
     343        1555 :     RegClass = TLI->getRepRegClassFor(VT)->getID();
     344        1555 :     Cost = TLI->getRepRegClassCostFor(VT);
     345        1555 :   }
     346        1555 : }
     347             : 
     348             : /// Schedule - Schedule the DAG using list scheduling.
     349        1555 : void ScheduleDAGRRList::Schedule() {
     350             :   LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
     351      489144 :                     << " '" << BB->getName() << "' **********\n");
     352      489144 : 
     353             :   CurCycle = 0;
     354             :   IssueCount = 0;
     355             :   MinAvailableCycle =
     356             :       DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
     357     1269002 :   NumLiveRegs = 0;
     358             :   // Allocate slots for each physical register, plus one for a special register
     359             :   // to track the virtual resource of a calling sequence.
     360             :   LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
     361     1269002 :   LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
     362     1269002 :   CallSeqEndForStart.clear();
     363     1269002 :   assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
     364     1269002 : 
     365     1269002 :   // Build the scheduling graph.
     366             :   BuildSchedGraph(nullptr);
     367             : 
     368   399161731 :   LLVM_DEBUG(dump());
     369   399161731 :   Topo.InitDAGTopologicalSorting();
     370     1269002 : 
     371             :   AvailableQueue->initNodes(SUnits);
     372             : 
     373             :   HazardRec->Reset();
     374     1269002 : 
     375             :   // Execute the actual scheduling loop.
     376             :   ListScheduleBottomUp();
     377     1269002 : 
     378             :   AvailableQueue->releaseState();
     379     1269002 : 
     380             :   LLVM_DEBUG({
     381     1269002 :     dbgs() << "*** Final schedule ***\n";
     382             :     dumpSchedule();
     383             :     dbgs() << '\n';
     384     1269002 :   });
     385             : }
     386     1269001 : 
     387             : //===----------------------------------------------------------------------===//
     388             : //  Bottom-Up Scheduling
     389             : //===----------------------------------------------------------------------===//
     390             : 
     391             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
     392             : /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
     393     1269001 : void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
     394             :   SUnit *PredSU = PredEdge->getSUnit();
     395             : 
     396             : #ifndef NDEBUG
     397             :   if (PredSU->NumSuccsLeft == 0) {
     398             :     dbgs() << "*** Scheduling failed! ***\n";
     399             :     dumpNode(*PredSU);
     400             :     dbgs() << " has been released too many times!\n";
     401    19782864 :     llvm_unreachable(nullptr);
     402    19782864 :   }
     403             : #endif
     404             :   --PredSU->NumSuccsLeft;
     405             : 
     406             :   if (!forceUnitLatencies()) {
     407             :     // Updating predecessor's height. This is now the cycle when the
     408             :     // predecessor can be scheduled without causing a pipeline stall.
     409             :     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
     410             :   }
     411             : 
     412    19782864 :   // If all the node's successors are scheduled, this node is ready
     413             :   // to be scheduled. Ignore the special EntrySU node.
     414    19782864 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
     415             :     PredSU->isAvailable = true;
     416             : 
     417      229184 :     unsigned Height = PredSU->getHeight();
     418             :     if (Height < MinAvailableCycle)
     419             :       MinAvailableCycle = Height;
     420             : 
     421             :     if (isReady(PredSU)) {
     422    19782864 :       AvailableQueue->push(PredSU);
     423    15049218 :     }
     424             :     // CapturePred and others may have left the node in the pending queue, avoid
     425             :     // adding it twice.
     426    15049218 :     else if (!PredSU->isPending) {
     427     8912324 :       PredSU->isPending = true;
     428             :       PendingQueue.push_back(PredSU);
     429    15049218 :     }
     430    15049218 :   }
     431             : }
     432             : 
     433             : /// IsChainDependent - Test if Outer is reachable from Inner through
     434           0 : /// chain dependencies.
     435           0 : static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
     436           0 :                              unsigned NestLevel,
     437             :                              const TargetInstrInfo *TII) {
     438             :   SDNode *N = Outer;
     439    19782864 :   while (true) {
     440             :     if (N == Inner)
     441             :       return true;
     442             :     // For a TokenFactor, examine each operand. There may be multiple ways
     443        2319 :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     444             :     // most nesting in order to ensure that we find the corresponding match.
     445             :     if (N->getOpcode() == ISD::TokenFactor) {
     446             :       for (const SDValue &Op : N->op_values())
     447             :         if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
     448        4712 :           return true;
     449             :       return false;
     450             :     }
     451             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     452             :     if (N->isMachineOpcode()) {
     453        4711 :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     454        1754 :         ++NestLevel;
     455        1330 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     456             :         if (NestLevel == 0)
     457             :           return false;
     458             :         --NestLevel;
     459             :       }
     460        4287 :     }
     461        3746 :     // Otherwise, find the chain and continue climbing.
     462           0 :     for (const SDValue &Op : N->op_values())
     463        3746 :       if (Op.getValueType() == MVT::Other) {
     464        1894 :         N = Op.getNode();
     465             :         goto found_chain_operand;
     466           0 :       }
     467             :     return false;
     468             :   found_chain_operand:;
     469             :     if (N->getOpcode() == ISD::EntryToken)
     470       11700 :       return false;
     471       11700 :   }
     472             : }
     473             : 
     474             : /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
     475             : /// the corresponding (lowered) CALLSEQ_BEGIN node.
     476             : ///
     477        2393 : /// NestLevel and MaxNested are used in recursion to indcate the current level
     478             : /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
     479             : /// level seen so far.
     480             : ///
     481             : /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
     482             : /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
     483             : static SDNode *
     484             : FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
     485             :                  const TargetInstrInfo *TII) {
     486             :   while (true) {
     487             :     // For a TokenFactor, examine each operand. There may be multiple ways
     488             :     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     489             :     // most nesting in order to ensure that we find the corresponding match.
     490             :     if (N->getOpcode() == ISD::TokenFactor) {
     491             :       SDNode *Best = nullptr;
     492     1114591 :       unsigned BestMaxNest = MaxNest;
     493             :       for (const SDValue &Op : N->op_values()) {
     494             :         unsigned MyNestLevel = NestLevel;
     495             :         unsigned MyMaxNest = MaxNest;
     496             :         if (SDNode *New = FindCallSeqStart(Op.getNode(),
     497             :                                            MyNestLevel, MyMaxNest, TII))
     498     5347447 :           if (!Best || (MyMaxNest > BestMaxNest)) {
     499             :             Best = New;
     500       41849 :             BestMaxNest = MyMaxNest;
     501      157127 :           }
     502      115278 :       }
     503      115278 :       assert(Best);
     504      115278 :       MaxNest = BestMaxNest;
     505             :       return Best;
     506      115241 :     }
     507             :     // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
     508       41849 :     if (N->isMachineOpcode()) {
     509             :       if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     510             :         ++NestLevel;
     511             :         MaxNest = std::max(MaxNest, NestLevel);
     512       41849 :       } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     513       41849 :         assert(NestLevel != 0);
     514             :         --NestLevel;
     515             :         if (NestLevel == 0)
     516     5305598 :           return N;
     517     3246347 :       }
     518      999314 :     }
     519      999314 :     // Otherwise, find the chain and continue climbing.
     520     2247033 :     for (const SDValue &Op : N->op_values())
     521             :       if (Op.getValueType() == MVT::Other) {
     522     1072706 :         N = Op.getNode();
     523     1072706 :         goto found_chain_operand;
     524     1072705 :       }
     525             :     return nullptr;
     526             :   found_chain_operand:;
     527             :     if (N->getOpcode() == ISD::EntryToken)
     528    11379035 :       return nullptr;
     529    11379035 :   }
     530             : }
     531             : 
     532             : /// Call ReleasePred for each predecessor, then update register live def/gen.
     533             : /// Always update LiveRegDefs for a register dependence even if the current SU
     534             : /// also defines the register. This effectively create one large live range
     535     4232893 : /// across a sequence of two-address node. This is important because the
     536             : /// entire chain must be scheduled together. Example:
     537             : ///
     538             : /// flags = (3) add
     539             : /// flags = (2) addc flags
     540             : /// flags = (1) addc flags
     541             : ///
     542             : /// results in
     543             : ///
     544             : /// LiveRegDefs[flags] = 3
     545             : /// LiveRegGens[flags] = 1
     546             : ///
     547             : /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
     548             : /// interference on flags.
     549             : void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
     550             :   // Bottom up: release predecessors
     551             :   for (SDep &Pred : SU->Preds) {
     552             :     ReleasePred(SU, &Pred);
     553             :     if (Pred.isAssignedRegDep()) {
     554             :       // This is a physical register dependency and it's impossible or
     555             :       // expensive to copy the register. Make sure nothing that can
     556             :       // clobber the register is scheduled between the predecessor and
     557    17587990 :       // this node.
     558             :       SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
     559    37370852 :       assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
     560    19782864 :              "interference on register dependence");
     561             :       LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
     562             :       if (!LiveRegGens[Pred.getReg()]) {
     563             :         ++NumLiveRegs;
     564             :         LiveRegGens[Pred.getReg()] = SU;
     565             :       }
     566      228489 :     }
     567             :   }
     568             : 
     569      228489 :   // If we're scheduling a lowered CALLSEQ_END, find the corresponding
     570      228489 :   // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
     571      223485 :   // these nodes, to prevent other calls from being interscheduled with them.
     572      446970 :   unsigned CallResource = TRI->getNumRegs();
     573             :   if (!LiveRegDefs[CallResource])
     574             :     for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
     575             :       if (Node->isMachineOpcode() &&
     576             :           Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     577             :         unsigned NestLevel = 0;
     578             :         unsigned MaxNest = 0;
     579             :         SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
     580    17587988 :         assert(N && "Must find call sequence start");
     581    35175976 : 
     582    16046011 :         SUnit *Def = &SUnits[N->getNodeId()];
     583    14776870 :         CallSeqEndForStart[Def] = SU;
     584     8899277 : 
     585      999313 :         ++NumLiveRegs;
     586      999313 :         LiveRegDefs[CallResource] = Def;
     587      999313 :         LiveRegGens[CallResource] = SU;
     588             :         break;
     589             :       }
     590      999313 : }
     591      999313 : 
     592             : /// Check to see if any of the pending instructions are ready to issue.  If
     593      999313 : /// so, add them to the available queue.
     594      999313 : void ScheduleDAGRRList::ReleasePending() {
     595      999313 :   if (DisableSchedCycles) {
     596             :     assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
     597             :     return;
     598    17587988 :   }
     599             : 
     600             :   // If the available queue is empty, it is safe to reset MinAvailableCycle.
     601             :   if (AvailableQueue->empty())
     602    16288428 :     MinAvailableCycle = std::numeric_limits<unsigned>::max();
     603    16288428 : 
     604             :   // Check to see if any of the pending instructions are ready to issue.  If
     605             :   // so, add them to the available queue.
     606             :   for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
     607             :     unsigned ReadyCycle = PendingQueue[i]->getHeight();
     608             :     if (ReadyCycle < MinAvailableCycle)
     609    16288428 :       MinAvailableCycle = ReadyCycle;
     610    10078210 : 
     611             :     if (PendingQueue[i]->isAvailable) {
     612             :       if (!isReady(PendingQueue[i]))
     613             :           continue;
     614    32576856 :       AvailableQueue->push(PendingQueue[i]);
     615           0 :     }
     616           0 :     PendingQueue[i]->isPending = false;
     617           0 :     PendingQueue[i] = PendingQueue.back();
     618             :     PendingQueue.pop_back();
     619           0 :     --i; --e;
     620             :   }
     621             : }
     622           0 : 
     623             : /// Move the scheduler state forward by the specified number of Cycles.
     624           0 : void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
     625           0 :   if (NextCycle <= CurCycle)
     626             :     return;
     627           0 : 
     628             :   IssueCount = 0;
     629             :   AvailableQueue->setCurCycle(NextCycle);
     630             :   if (!HazardRec->isEnabled()) {
     631             :     // Bypass lots of virtual calls in case of long latency.
     632    47880727 :     CurCycle = NextCycle;
     633    47880727 :   }
     634             :   else {
     635             :     for (; CurCycle != NextCycle; ++CurCycle) {
     636    16287476 :       HazardRec->RecedeCycle();
     637    16287476 :     }
     638    16287476 :   }
     639             :   // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
     640    16236782 :   // available Q to release pending nodes at least once before popping.
     641             :   ReleasePending();
     642             : }
     643      168868 : 
     644      118174 : /// Move the scheduler state forward until the specified node's dependents are
     645             : /// ready and can be scheduled with no resource conflicts.
     646             : void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
     647             :   if (DisableSchedCycles)
     648             :     return;
     649    16287476 : 
     650             :   // FIXME: Nodes such as CopyFromReg probably should not advance the current
     651             :   // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
     652             :   // has predecessors the cycle will be advanced when they are scheduled.
     653             :   // But given the crude nature of modeling latency though such nodes, we
     654    16318988 :   // currently need to treat these nodes like real instructions.
     655    16318988 :   // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
     656             : 
     657             :   unsigned ReadyCycle = SU->getHeight();
     658             : 
     659             :   // Bump CurCycle to account for latency. We assume the latency of other
     660             :   // available instructions may be hidden by the stall (not a full pipe stall).
     661             :   // This updates the hazard recognizer's cycle before reserving resources for
     662             :   // this instruction.
     663             :   AdvanceToCycle(ReadyCycle);
     664             : 
     665             :   // Calls are scheduled in their preceding cycle, so don't conflict with
     666             :   // hazards from instructions after the call. EmitNode will reset the
     667             :   // scoreboard state before emitting the call.
     668             :   if (SU->isCall)
     669             :     return;
     670             : 
     671    16318987 :   // FIXME: For resource conflicts in very long non-pipelined stages, we
     672             :   // should probably skip ahead here to avoid useless scoreboard checks.
     673             :   int Stalls = 0;
     674             :   while (true) {
     675             :     ScheduleHazardRecognizer::HazardType HT =
     676    16318988 :       HazardRec->getHazardType(SU, -Stalls);
     677             : 
     678             :     if (HT == ScheduleHazardRecognizer::NoHazard)
     679             :       break;
     680             : 
     681             :     ++Stalls;
     682             :   }
     683             :   AdvanceToCycle(CurCycle + Stalls);
     684    15329800 : }
     685             : 
     686    15329800 : /// Record this SUnit in the HazardRecognizer.
     687             : /// Does not update CurCycle.
     688             : void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     689        9741 :   if (!HazardRec->isEnabled())
     690        9741 :     return;
     691    15320059 : 
     692             :   // Check for phys reg copy.
     693             :   if (!SU->getNode())
     694             :     return;
     695             : 
     696           0 :   switch (SU->getNode()->getOpcode()) {
     697           0 :   default:
     698           0 :     assert(SU->getNode()->isMachineOpcode() &&
     699             :            "This target-independent node should not be scheduled.");
     700             :     break;
     701           0 :   case ISD::MERGE_VALUES:
     702           0 :   case ISD::TokenFactor:
     703             :   case ISD::LIFETIME_START:
     704           0 :   case ISD::LIFETIME_END:
     705             :   case ISD::CopyToReg:
     706             :   case ISD::CopyFromReg:
     707             :   case ISD::EH_LABEL:
     708             :     // Noops don't affect the scoreboard state. Copies are likely to be
     709             :     // removed.
     710             :     return;
     711             :   case ISD::INLINEASM:
     712             :     // For inline asm, clear the pipeline state.
     713             :     HazardRec->Reset();
     714             :     return;
     715             :   }
     716             :   if (SU->isCall) {
     717             :     // Calls are scheduled with their preceding instructions. For bottom-up
     718             :     // scheduling, clear the pipeline state before emitting.
     719           0 :     HazardRec->Reset();
     720             :   }
     721           0 : 
     722           0 :   HazardRec->EmitInstruction(SU);
     723             : }
     724           0 : 
     725             : static void resetVRegCycle(SUnit *SU);
     726             : 
     727           0 : /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
     728             : /// count of its predecessors. If a predecessor pending count is zero, add it to
     729             : /// the Available queue.
     730           0 : void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
     731             :   LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
     732             :   LLVM_DEBUG(dumpNode(*SU));
     733             : 
     734             : #ifndef NDEBUG
     735             :   if (CurCycle < SU->getHeight())
     736             :     LLVM_DEBUG(dbgs() << "   Height [" << SU->getHeight()
     737             :                       << "] pipeline stall!\n");
     738    16318988 : #endif
     739             : 
     740             :   // FIXME: Do not modify node height. It may interfere with
     741             :   // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
     742             :   // node its ready cycle can aid heuristics, and after scheduling it can
     743             :   // indicate the scheduled cycle.
     744             :   SU->setHeightToAtLeast(CurCycle);
     745             : 
     746             :   // Reserve resources for the scheduled instruction.
     747             :   EmitNode(SU);
     748             : 
     749             :   Sequence.push_back(SU);
     750             : 
     751             :   AvailableQueue->scheduledNode(SU);
     752    16318988 : 
     753             :   // If HazardRec is disabled, and each inst counts as one cycle, then
     754             :   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
     755    16318988 :   // PendingQueue for schedulers that implement HasReadyFilter.
     756             :   if (!HazardRec->isEnabled() && AvgIPC < 2)
     757    16318987 :     AdvanceToCycle(CurCycle + 1);
     758             : 
     759    16318988 :   // Update liveness of predecessors before successors to avoid treating a
     760             :   // two-address node as a live range def.
     761             :   ReleasePredecessors(SU);
     762             : 
     763             :   // Release all the implicit physical register defs that are live.
     764    16318987 :   for (SDep &Succ : SU->Succs) {
     765    16236716 :     // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
     766             :     if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
     767             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     768             :       --NumLiveRegs;
     769    16318987 :       LiveRegDefs[Succ.getReg()] = nullptr;
     770             :       LiveRegGens[Succ.getReg()] = nullptr;
     771             :       releaseInterferences(Succ.getReg());
     772    36151353 :     }
     773             :   }
     774      454348 :   // Release the special call resource dependence, if this is the beginning
     775             :   // of a call.
     776      222532 :   unsigned CallResource = TRI->getNumRegs();
     777      445064 :   if (LiveRegDefs[CallResource] == SU)
     778      222532 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     779      222532 :          SUNode = SUNode->getGluedNode()) {
     780             :       if (SUNode->isMachineOpcode() &&
     781             :           SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     782             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     783             :         --NumLiveRegs;
     784    16318987 :         LiveRegDefs[CallResource] = nullptr;
     785    32637974 :         LiveRegGens[CallResource] = nullptr;
     786     1002616 :         releaseInterferences(CallResource);
     787             :       }
     788     1002616 :     }
     789     1000984 : 
     790             :   resetVRegCycle(SU);
     791      999314 : 
     792      999314 :   SU->isScheduled = true;
     793      999314 : 
     794      999314 :   // Conditions under which the scheduler should eagerly advance the cycle:
     795             :   // (1) No available instructions
     796             :   // (2) All pipelines full, so available instructions must have hazards.
     797             :   //
     798    16318987 :   // If HazardRec is disabled, the cycle was pre-advanced before calling
     799             :   // ReleasePredecessors. In that case, IssueCount should remain 0.
     800    16318987 :   //
     801             :   // Check AvailableQueue after ReleasePredecessors in case of zero latency.
     802             :   if (HazardRec->isEnabled() || AvgIPC > 1) {
     803             :     if (SU->getNode() && SU->getNode()->isMachineOpcode())
     804             :       ++IssueCount;
     805             :     if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
     806             :         || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
     807             :       AdvanceToCycle(CurCycle + 1);
     808             :   }
     809             : }
     810    16318987 : 
     811       82272 : /// CapturePred - This does the opposite of ReleasePred. Since SU is being
     812       58164 : /// unscheduled, increase the succ left count of its predecessors. Remove
     813       82272 : /// them from AvailableQueue if necessary.
     814      159579 : void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
     815        4965 :   SUnit *PredSU = PredEdge->getSUnit();
     816             :   if (PredSU->isAvailable) {
     817    16318987 :     PredSU->isAvailable = false;
     818             :     if (!PredSU->isPending)
     819             :       AvailableQueue->remove(PredSU);
     820             :   }
     821             : 
     822           0 :   assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
     823             :          "NumSuccsLeft will overflow!");
     824           0 :   ++PredSU->NumSuccsLeft;
     825           0 : }
     826           0 : 
     827           0 : /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
     828             : /// its predecessor states to reflect the change.
     829             : void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
     830             :   LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
     831             :   LLVM_DEBUG(dumpNode(*SU));
     832           0 : 
     833           0 :   for (SDep &Pred : SU->Preds) {
     834             :     CapturePred(&Pred);
     835             :     if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
     836             :       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     837        8840 :       assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
     838             :              "Physical register dependency violated?");
     839             :       --NumLiveRegs;
     840             :       LiveRegDefs[Pred.getReg()] = nullptr;
     841       22127 :       LiveRegGens[Pred.getReg()] = nullptr;
     842       13287 :       releaseInterferences(Pred.getReg());
     843        3876 :     }
     844             :   }
     845             : 
     846             :   // Reclaim the special call resource dependence, if this is the beginning
     847         952 :   // of a call.
     848        1904 :   unsigned CallResource = TRI->getNumRegs();
     849         952 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     850         952 :        SUNode = SUNode->getGluedNode()) {
     851             :     if (SUNode->isMachineOpcode() &&
     852             :         SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
     853             :       SUnit *SeqEnd = CallSeqEndForStart[SU];
     854             :       assert(SeqEnd && "Call sequence start/end must be known");
     855             :       assert(!LiveRegDefs[CallResource]);
     856        8840 :       assert(!LiveRegGens[CallResource]);
     857       10752 :       ++NumLiveRegs;
     858             :       LiveRegDefs[CallResource] = SU;
     859       10719 :       LiveRegGens[CallResource] = SeqEnd;
     860        8618 :     }
     861           1 :   }
     862             : 
     863             :   // Release the special call resource dependence, if this is the end
     864             :   // of a call.
     865           1 :   if (LiveRegGens[CallResource] == SU)
     866           2 :     for (const SDNode *SUNode = SU->getNode(); SUNode;
     867           1 :          SUNode = SUNode->getGluedNode()) {
     868             :       if (SUNode->isMachineOpcode() &&
     869             :           SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
     870             :         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
     871             :         assert(LiveRegDefs[CallResource]);
     872             :         assert(LiveRegGens[CallResource]);
     873       17680 :         --NumLiveRegs;
     874           0 :         LiveRegDefs[CallResource] = nullptr;
     875             :         LiveRegGens[CallResource] = nullptr;
     876           0 :         releaseInterferences(CallResource);
     877           0 :       }
     878             :     }
     879             : 
     880             :   for (auto &Succ : SU->Succs) {
     881           0 :     if (Succ.isAssignedRegDep()) {
     882           0 :       auto Reg = Succ.getReg();
     883           0 :       if (!LiveRegDefs[Reg])
     884           0 :         ++NumLiveRegs;
     885             :       // This becomes the nearest def. Note that an earlier def may still be
     886             :       // pending if this is a two-address node.
     887             :       LiveRegDefs[Reg] = SU;
     888       71536 : 
     889             :       // Update LiveRegGen only if was empty before this unscheduling.
     890             :       // This is to avoid incorrect updating LiveRegGen set in previous run.
     891        1108 :       if (!LiveRegGens[Reg]) {
     892           0 :         // Find the successor with the lowest height.
     893             :         LiveRegGens[Reg] = Succ.getSUnit();
     894             :         for (auto &Succ2 : SU->Succs) {
     895         554 :           if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
     896             :               Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
     897             :             LiveRegGens[Reg] = Succ2.getSUnit();
     898             :         }
     899         554 :       }
     900             :     }
     901           0 :   }
     902           0 :   if (SU->getHeight() < MinAvailableCycle)
     903           0 :     MinAvailableCycle = SU->getHeight();
     904           0 : 
     905           0 :   SU->setHeightDirty();
     906             :   SU->isScheduled = false;
     907             :   SU->isAvailable = true;
     908             :   if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
     909             :     // Don't make available until backtracking is complete.
     910        8840 :     SU->isPending = true;
     911        8231 :     PendingQueue.push_back(SU);
     912             :   }
     913        8840 :   else {
     914        8840 :     AvailableQueue->push(SU);
     915        8840 :   }
     916        8840 :   AvailableQueue->unscheduledNode(SU);
     917             : }
     918           0 : 
     919           0 : /// After backtracking, the hazard checker needs to be restored to a state
     920             : /// corresponding the current cycle.
     921             : void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
     922        8840 :   HazardRec->Reset();
     923             : 
     924        8840 :   unsigned LookAhead = std::min((unsigned)Sequence.size(),
     925        8840 :                                 HazardRec->getMaxLookAhead());
     926             :   if (LookAhead == 0)
     927             :     return;
     928             : 
     929         952 :   std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
     930         952 :   unsigned HazardCycle = (*I)->getHeight();
     931             :   for (auto E = Sequence.end(); I != E; ++I) {
     932         952 :     SUnit *SU = *I;
     933         952 :     for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
     934         952 :       HazardRec->RecedeCycle();
     935             :     }
     936             :     EmitNode(SU);
     937             :   }
     938          10 : }
     939         244 : 
     940         234 : /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
     941         542 : /// BTCycle in order to schedule a specific node.
     942         154 : void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
     943             :   SUnit *OldSU = Sequence.back();
     944         234 :   while (true) {
     945             :     Sequence.pop_back();
     946             :     // FIXME: use ready cycle instead of height
     947             :     CurCycle = OldSU->getHeight();
     948             :     UnscheduleNodeBottomUp(OldSU);
     949             :     AvailableQueue->setCurCycle(CurCycle);
     950           0 :     if (OldSU == BtSU)
     951           0 :       break;
     952             :     OldSU = Sequence.back();
     953           0 :   }
     954             : 
     955           0 :   assert(!SU->isSucc(OldSU) && "Something is wrong!");
     956           0 : 
     957           0 :   RestoreHazardCheckerBottomUp();
     958           0 : 
     959             :   ReleasePending();
     960           0 : 
     961             :   ++NumBacktracks;
     962             : }
     963             : 
     964             : static bool isOperandOf(const SUnit *SU, SDNode *N) {
     965           0 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
     966             :        SUNode = SUNode->getGluedNode()) {
     967           0 :     if (SUNode->isOperandOf(N))
     968             :       return true;
     969             :   }
     970           0 :   return false;
     971             : }
     972         201 : 
     973         201 : /// TryUnfold - Attempt to unfold
     974             : SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
     975         201 :   SDNode *N = SU->getNode();
     976             :   // Use while over if to ease fall through.
     977             :   SmallVector<SDNode *, 2> NewNodes;
     978             :   if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
     979             :     return nullptr;
     980             : 
     981             :   // unfolding an x86 DEC64m operation results in store, dec, load which
     982         144 :   // can't be handled here so quit
     983         144 :   if (NewNodes.size() == 3)
     984             :     return nullptr;
     985             : 
     986         144 :   assert(NewNodes.size() == 2 && "Expected a load folding node!");
     987             : 
     988             :   N = NewNodes[1];
     989             :   SDNode *LoadNode = NewNodes[0];
     990             :   unsigned NumVals = N->getNumValues();
     991         144 :   unsigned OldNumVals = SU->getNode()->getNumValues();
     992             : 
     993             :   // LoadNode may already exist. This can happen when there is another
     994             :   // load from the same location and producing the same type of value
     995             :   // but it has different alignment or volatileness.
     996         137 :   bool isNewLoad = true;
     997         137 :   SUnit *LoadSU;
     998         137 :   if (LoadNode->getNodeId() != -1) {
     999         137 :     LoadSU = &SUnits[LoadNode->getNodeId()];
    1000             :     // If LoadSU has already been scheduled, we should clone it but
    1001             :     // this would negate the benefit to unfolding so just return SU.
    1002             :     if (LoadSU->isScheduled)
    1003             :       return SU;
    1004             :     isNewLoad = false;
    1005             :   } else {
    1006         137 :     LoadSU = CreateNewSUnit(LoadNode);
    1007           2 :     LoadNode->setNodeId(LoadSU->NodeNum);
    1008             : 
    1009             :     InitNumRegDefsLeft(LoadSU);
    1010           2 :     computeLatency(LoadSU);
    1011             :   }
    1012             : 
    1013             :   bool isNewN = true;
    1014         135 :   SUnit *NewSU;
    1015         135 :   // This can only happen when isNewLoad is false.
    1016             :   if (N->getNodeId() != -1) {
    1017         135 :     NewSU = &SUnits[N->getNodeId()];
    1018         135 :     // If NewSU has already been scheduled, we need to clone it, but this
    1019             :     // negates the benefit to unfolding so just return SU.
    1020             :     if (NewSU->isScheduled)
    1021             :       return SU;
    1022             :     isNewN = false;
    1023             :   } else {
    1024         136 :     NewSU = CreateNewSUnit(N);
    1025           1 :     N->setNodeId(NewSU->NodeNum);
    1026             : 
    1027             :     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1028           1 :     for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
    1029             :       if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
    1030             :         NewSU->isTwoAddress = true;
    1031             :         break;
    1032         135 :       }
    1033         135 :     }
    1034             :     if (MCID.isCommutable())
    1035         270 :       NewSU->isCommutable = true;
    1036         480 : 
    1037             :     InitNumRegDefsLeft(NewSU);
    1038          60 :     computeLatency(NewSU);
    1039          60 :   }
    1040             : 
    1041             :   LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
    1042         270 : 
    1043         116 :   // Now that we are committed to unfolding replace DAG Uses.
    1044             :   for (unsigned i = 0; i != NumVals; ++i)
    1045         135 :     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
    1046         135 :   DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
    1047             :                                  SDValue(LoadNode, 1));
    1048             : 
    1049             :   // Record all the edges to and from the old SU, by category.
    1050             :   SmallVector<SDep, 4> ChainPreds;
    1051             :   SmallVector<SDep, 4> ChainSuccs;
    1052         333 :   SmallVector<SDep, 4> LoadPreds;
    1053         591 :   SmallVector<SDep, 4> NodePreds;
    1054         408 :   SmallVector<SDep, 4> NodeSuccs;
    1055             :   for (SDep &Pred : SU->Preds) {
    1056             :     if (Pred.isCtrl())
    1057             :       ChainPreds.push_back(Pred);
    1058             :     else if (isOperandOf(Pred.getSUnit(), LoadNode))
    1059             :       LoadPreds.push_back(Pred);
    1060             :     else
    1061             :       NodePreds.push_back(Pred);
    1062             :   }
    1063         447 :   for (SDep &Succ : SU->Succs) {
    1064         311 :     if (Succ.isCtrl())
    1065         110 :       ChainSuccs.push_back(Succ);
    1066         201 :     else
    1067         140 :       NodeSuccs.push_back(Succ);
    1068             :   }
    1069          61 : 
    1070             :   // Now assign edges to the newly-created nodes.
    1071         582 :   for (const SDep &Pred : ChainPreds) {
    1072         446 :     RemovePred(SU, Pred);
    1073         245 :     if (isNewLoad)
    1074             :       AddPred(LoadSU, Pred);
    1075         201 :   }
    1076             :   for (const SDep &Pred : LoadPreds) {
    1077             :     RemovePred(SU, Pred);
    1078             :     if (isNewLoad)
    1079         246 :       AddPred(LoadSU, Pred);
    1080         110 :   }
    1081         110 :   for (const SDep &Pred : NodePreds) {
    1082         110 :     RemovePred(SU, Pred);
    1083             :     AddPred(NewSU, Pred);
    1084         276 :   }
    1085         140 :   for (SDep D : NodeSuccs) {
    1086         140 :     SUnit *SuccDep = D.getSUnit();
    1087         140 :     D.setSUnit(SU);
    1088             :     RemovePred(SuccDep, D);
    1089         197 :     D.setSUnit(NewSU);
    1090          61 :     AddPred(SuccDep, D);
    1091          61 :     // Balance register pressure.
    1092             :     if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
    1093         337 :         !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
    1094             :       --NewSU->NumRegDefsLeft;
    1095             :   }
    1096         201 :   for (SDep D : ChainSuccs) {
    1097             :     SUnit *SuccDep = D.getSUnit();
    1098         201 :     D.setSUnit(SU);
    1099             :     RemovePred(SuccDep, D);
    1100         201 :     if (isNewLoad) {
    1101         201 :       D.setSUnit(LoadSU);
    1102           0 :       AddPred(SuccDep, D);
    1103             :     }
    1104         381 :   }
    1105             : 
    1106             :   // Add a data dependency to reflect that NewSU reads the value defined
    1107         245 :   // by LoadSU.
    1108         245 :   SDep D(LoadSU, SDep::Data, 0);
    1109             :   D.setLatency(LoadSU->Latency);
    1110         245 :   AddPred(NewSU, D);
    1111             : 
    1112             :   if (isNewLoad)
    1113             :     AvailableQueue->addNode(LoadSU);
    1114             :   if (isNewN)
    1115             :     AvailableQueue->addNode(NewSU);
    1116             : 
    1117         136 :   ++NumUnfolds;
    1118         136 : 
    1119             :   if (NewSU->NumSuccsLeft == 0)
    1120         136 :     NewSU->isAvailable = true;
    1121         135 : 
    1122         136 :   return NewSU;
    1123         135 : }
    1124             : 
    1125             : /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
    1126             : /// successors to the newly created node.
    1127         136 : SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
    1128         126 :   SDNode *N = SU->getNode();
    1129             :   if (!N)
    1130             :     return nullptr;
    1131             : 
    1132             :   LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
    1133             :   LLVM_DEBUG(dumpNode(*SU));
    1134             : 
    1135         453 :   if (N->getGluedNode() &&
    1136         453 :       !TII->canCopyGluedNodeDuringSchedule(N)) {
    1137         453 :     LLVM_DEBUG(
    1138             :         dbgs()
    1139             :         << "Giving up because it has incoming glue and the target does not "
    1140             :            "want to copy it\n");
    1141             :     return nullptr;
    1142             :   }
    1143          48 : 
    1144          48 :   SUnit *NewSU;
    1145             :   bool TryUnfold = false;
    1146             :   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
    1147             :     MVT VT = N->getSimpleValueType(i);
    1148             :     if (VT == MVT::Glue) {
    1149             :       LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
    1150             :       return nullptr;
    1151             :     } else if (VT == MVT::Other)
    1152             :       TryUnfold = true;
    1153             :   }
    1154        1135 :   for (const SDValue &Op : N->op_values()) {
    1155             :     MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
    1156         728 :     if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
    1157             :       LLVM_DEBUG(
    1158             :           dbgs() << "Giving up because it one of the operands is glue and "
    1159         728 :                     "the target does not want to copy it\n");
    1160             :       return nullptr;
    1161             :     }
    1162        1936 :   }
    1163        1529 : 
    1164        1529 :   // If possible unfold instruction.
    1165             :   if (TryUnfold) {
    1166             :     SUnit *UnfoldSU = TryUnfoldSU(SU);
    1167             :     if (!UnfoldSU)
    1168             :       return nullptr;
    1169             :     SU = UnfoldSU;
    1170             :     N = SU->getNode();
    1171             :     // If this can be scheduled don't bother duplicating and just return
    1172             :     if (SU->NumSuccsLeft == 0)
    1173         407 :       return SU;
    1174         144 :   }
    1175         144 : 
    1176             :   LLVM_DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
    1177             :   NewSU = CreateClone(SU);
    1178             : 
    1179             :   // New SUnit has the exact same predecessors.
    1180         137 :   for (SDep &Pred : SU->Preds)
    1181             :     if (!Pred.isArtificial())
    1182             :       AddPred(NewSU, Pred);
    1183             : 
    1184             :   // Only copy scheduled successors. Cut them from old node's successor
    1185         274 :   // list and move them over.
    1186             :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1187             :   for (SDep &Succ : SU->Succs) {
    1188         661 :     if (Succ.isArtificial())
    1189             :       continue;
    1190         387 :     SUnit *SuccSU = Succ.getSUnit();
    1191             :     if (SuccSU->isScheduled) {
    1192             :       SDep D = Succ;
    1193             :       D.setSUnit(NewSU);
    1194             :       AddPred(SuccSU, D);
    1195        1252 :       D.setSUnit(SU);
    1196             :       DelDeps.push_back(std::make_pair(SuccSU, D));
    1197             :     }
    1198             :   }
    1199         978 :   for (auto &DelDep : DelDeps)
    1200         415 :     RemovePred(DelDep.first, DelDep.second);
    1201             : 
    1202         415 :   AvailableQueue->updateNode(SU);
    1203             :   AvailableQueue->addNode(NewSU);
    1204         415 : 
    1205             :   ++NumDups;
    1206             :   return NewSU;
    1207         689 : }
    1208         415 : 
    1209             : /// InsertCopiesAndMoveSuccs - Insert register copies and move all
    1210         274 : /// scheduled successors of the given SUnit to the last copy.
    1211         274 : void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
    1212             :                                               const TargetRegisterClass *DestRC,
    1213             :                                               const TargetRegisterClass *SrcRC,
    1214             :                                               SmallVectorImpl<SUnit*> &Copies) {
    1215             :   SUnit *CopyFromSU = CreateNewSUnit(nullptr);
    1216             :   CopyFromSU->CopySrcRC = SrcRC;
    1217             :   CopyFromSU->CopyDstRC = DestRC;
    1218             : 
    1219          53 :   SUnit *CopyToSU = CreateNewSUnit(nullptr);
    1220             :   CopyToSU->CopySrcRC = DestRC;
    1221             :   CopyToSU->CopyDstRC = SrcRC;
    1222             : 
    1223          53 :   // Only copy scheduled successors. Cut them from old node's successor
    1224          53 :   // list and move them over.
    1225          53 :   SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
    1226             :   for (SDep &Succ : SU->Succs) {
    1227          53 :     if (Succ.isArtificial())
    1228          53 :       continue;
    1229          53 :     SUnit *SuccSU = Succ.getSUnit();
    1230             :     if (SuccSU->isScheduled) {
    1231             :       SDep D = Succ;
    1232             :       D.setSUnit(CopyToSU);
    1233             :       AddPred(SuccSU, D);
    1234         236 :       DelDeps.push_back(std::make_pair(SuccSU, Succ));
    1235             :     }
    1236             :     else {
    1237             :       // Avoid scheduling the def-side copy before other successors. Otherwise
    1238         183 :       // we could introduce another physreg interference on the copy and
    1239          93 :       // continue inserting copies indefinitely.
    1240          93 :       AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
    1241          93 :     }
    1242          93 :   }
    1243             :   for (auto &DelDep : DelDeps)
    1244             :     RemovePred(DelDep.first, DelDep.second);
    1245             : 
    1246             :   SDep FromDep(SU, SDep::Data, Reg);
    1247             :   FromDep.setLatency(SU->Latency);
    1248         180 :   AddPred(CopyFromSU, FromDep);
    1249             :   SDep ToDep(CopyFromSU, SDep::Data, 0);
    1250             :   ToDep.setLatency(CopyFromSU->Latency);
    1251         146 :   AddPred(CopyToSU, ToDep);
    1252          93 : 
    1253             :   AvailableQueue->updateNode(SU);
    1254             :   AvailableQueue->addNode(CopyFromSU);
    1255          53 :   AvailableQueue->addNode(CopyToSU);
    1256          53 :   Copies.push_back(CopyFromSU);
    1257          53 :   Copies.push_back(CopyToSU);
    1258          53 : 
    1259          53 :   ++NumPRCopies;
    1260             : }
    1261          53 : 
    1262          53 : /// getPhysicalRegisterVT - Returns the ValueType of the physical register
    1263          53 : /// definition of the specified node.
    1264          53 : /// FIXME: Move to SelectionDAG?
    1265          53 : static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
    1266             :                                  const TargetInstrInfo *TII) {
    1267             :   unsigned NumRes;
    1268          53 :   if (N->getOpcode() == ISD::CopyFromReg) {
    1269             :     // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
    1270             :     NumRes = 1;
    1271             :   } else {
    1272             :     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    1273         453 :     assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
    1274             :     NumRes = MCID.getNumDefs();
    1275             :     for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
    1276         453 :       if (Reg == *ImpDef)
    1277             :         break;
    1278             :       ++NumRes;
    1279             :     }
    1280         439 :   }
    1281             :   return N->getSimpleValueType(NumRes);
    1282         439 : }
    1283         441 : 
    1284         441 : /// CheckForLiveRegDef - Return true and update live register vector if the
    1285             : /// specified register def of the specified SUnit clobbers any "live" registers.
    1286           2 : static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
    1287             :                                SUnit **LiveRegDefs,
    1288             :                                SmallSet<unsigned, 4> &RegAdded,
    1289         453 :                                SmallVectorImpl<unsigned> &LRegs,
    1290             :                                const TargetRegisterInfo *TRI) {
    1291             :   for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
    1292             : 
    1293             :     // Check if Ref is live.
    1294     3331846 :     if (!LiveRegDefs[*AliasI]) continue;
    1295             : 
    1296             :     // Allow multiple uses of the same def.
    1297             :     if (LiveRegDefs[*AliasI] == SU) continue;
    1298             : 
    1299    16573388 :     // Add Reg to the set of interfering live regs.
    1300             :     if (RegAdded.insert(*AliasI).second) {
    1301             :       LRegs.push_back(*AliasI);
    1302    13241542 :     }
    1303             :   }
    1304             : }
    1305      238836 : 
    1306             : /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
    1307             : /// by RegMask, and add them to LRegs.
    1308       11592 : static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
    1309       17996 :                                      ArrayRef<SUnit*> LiveRegDefs,
    1310             :                                      SmallSet<unsigned, 4> &RegAdded,
    1311             :                                      SmallVectorImpl<unsigned> &LRegs) {
    1312     3331846 :   // Look at all live registers. Skip Reg0 and the special CallResource.
    1313             :   for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
    1314             :     if (!LiveRegDefs[i]) continue;
    1315             :     if (LiveRegDefs[i] == SU) continue;
    1316        1078 :     if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
    1317             :     if (RegAdded.insert(i).second)
    1318             :       LRegs.push_back(i);
    1319             :   }
    1320             : }
    1321      310153 : 
    1322      618150 : /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
    1323          87 : static const uint32_t *getNodeRegMask(const SDNode *N) {
    1324          87 :   for (const SDValue &Op : N->op_values())
    1325          87 :     if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
    1326           8 :       return RegOp->getRegMask();
    1327             :   return nullptr;
    1328        1078 : }
    1329             : 
    1330             : /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
    1331             : /// scheduling of the given node to satisfy live physical register dependencies.
    1332    12100047 : /// If the specific node is the last one that's available to schedule, do
    1333     9591337 : /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
    1334        1082 : bool ScheduleDAGRRList::
    1335             : DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
    1336             :   if (NumLiveRegs == 0)
    1337             :     return false;
    1338             : 
    1339             :   SmallSet<unsigned, 4> RegAdded;
    1340             :   // If this node would clobber any "live" register, then it's not ready.
    1341             :   //
    1342    16328529 :   // If SU is the currently live definition of the same register that it uses,
    1343             :   // then we are free to schedule it.
    1344    16328529 :   for (SDep &Pred : SU->Preds) {
    1345             :     if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
    1346             :       CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
    1347     2775795 :                          RegAdded, LRegs, TRI);
    1348             :   }
    1349             : 
    1350             :   for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
    1351             :     if (Node->getOpcode() == ISD::INLINEASM) {
    1352     4896872 :       // Inline asm can clobber physical defs.
    1353       20216 :       unsigned NumOps = Node->getNumOperands();
    1354       15980 :       if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
    1355             :         --NumOps;  // Ignore the glue operand.
    1356             : 
    1357             :       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
    1358     2810863 :         unsigned Flags =
    1359     2810795 :           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
    1360             :         unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
    1361           4 : 
    1362           8 :         ++i; // Skip the ID value.
    1363             :         if (InlineAsm::isRegDefKind(Flags) ||
    1364             :             InlineAsm::isRegDefEarlyClobberKind(Flags) ||
    1365          14 :             InlineAsm::isClobberKind(Flags)) {
    1366             :           // Check for def of register or earlyclobber register.
    1367          30 :           for (; NumVals; --NumVals, ++i) {
    1368             :             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
    1369             :             if (TargetRegisterInfo::isPhysicalRegister(Reg))
    1370          10 :               CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1371           5 :           }
    1372          15 :         } else
    1373             :           i += NumVals;
    1374             :       }
    1375          16 :       continue;
    1376          16 :     }
    1377           8 : 
    1378           6 :     if (!Node->isMachineOpcode())
    1379             :       continue;
    1380             :     // If we're in the middle of scheduling a call, don't begin scheduling
    1381           2 :     // another call. Also, don't allow any physical registers to be live across
    1382             :     // the call.
    1383             :     if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
    1384             :       // Check the special calling-sequence resource.
    1385             :       unsigned CallResource = TRI->getNumRegs();
    1386     2810791 :       if (LiveRegDefs[CallResource]) {
    1387             :         SDNode *Gen = LiveRegGens[CallResource]->getNode();
    1388             :         while (SDNode *Glued = Gen->getGluedNode())
    1389             :           Gen = Glued;
    1390             :         if (!IsChainDependent(Gen, Node, 0, TII) &&
    1391     2509784 :             RegAdded.insert(CallResource).second)
    1392             :           LRegs.push_back(CallResource);
    1393        1077 :       }
    1394        2154 :     }
    1395         989 :     if (const uint32_t *RegMask = getNodeRegMask(Node))
    1396             :       CheckForLiveRegDefMasked(SU, RegMask,
    1397             :                                makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
    1398         989 :                                RegAdded, LRegs);
    1399         988 : 
    1400         988 :     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
    1401             :     if (MCID.hasOptionalDef()) {
    1402             :       // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
    1403     2509784 :       // This operand can be either a def of CPSR, if the S bit is set; or a use
    1404        1078 :       // of %noreg.  When the OptionalDef is set to a valid register, we need to
    1405        1078 :       // handle it in the same way as an ImplicitDef.
    1406             :       for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
    1407             :         if (MCID.OpInfo[i].isOptionalDef()) {
    1408     5019568 :           const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
    1409     5019568 :           unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
    1410             :           CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1411             :         }
    1412             :     }
    1413             :     if (!MCID.ImplicitDefs)
    1414       10787 :       continue;
    1415        3889 :     for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
    1416         880 :       CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
    1417         440 :   }
    1418         880 : 
    1419             :   return !LRegs.empty();
    1420             : }
    1421     2509784 : 
    1422             : void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
    1423     4670939 :   // Add the nodes that aren't ready back onto the available list.
    1424     6646826 :   for (unsigned i = Interferences.size(); i > 0; --i) {
    1425             :     SUnit *SU = Interferences[i-1];
    1426             :     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
    1427     2775795 :     if (Reg) {
    1428             :       SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
    1429             :       if (!is_contained(LRegs, Reg))
    1430     1222798 :         continue;
    1431             :     }
    1432     1232792 :     SU->isPending = false;
    1433        9994 :     // The interfering node may no longer be available due to backtracking.
    1434        9994 :     // Furthermore, it may have been made available again, in which case it is
    1435        9994 :     // now already in the AvailableQueue.
    1436             :     if (SU->isAvailable && !SU->NodeQueueId) {
    1437        9994 :       LLVM_DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
    1438           2 :       AvailableQueue->push(SU);
    1439             :     }
    1440        9992 :     if (i < Interferences.size())
    1441             :       Interferences[i-1] = Interferences.back();
    1442             :     Interferences.pop_back();
    1443             :     LRegsMap.erase(LRegsPos);
    1444        9992 :   }
    1445             : }
    1446        8325 : 
    1447             : /// Return a node that can be scheduled in this cycle. Requirements:
    1448       19984 : /// (1) Ready: latency has been satisfied
    1449           0 : /// (2) No Hazards: resources are available
    1450             : /// (3) No Interferences: may unschedule to break register interferences.
    1451             : SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
    1452             :   SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
    1453     1222798 :   auto FindAvailableNode = [&]() {
    1454             :     while (CurSU) {
    1455             :       SmallVector<unsigned, 4> LRegs;
    1456             :       if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
    1457             :         break;
    1458             :       LLVM_DEBUG(dbgs() << "    Interfering reg ";
    1459    16318987 :                  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
    1460    16318987 :                  else dbgs() << printReg(LRegs[0], TRI);
    1461             :                  dbgs() << " SU #" << CurSU->NodeNum << '\n');
    1462             :       std::pair<LRegsMapT::iterator, bool> LRegsPair =
    1463             :         LRegsMap.insert(std::make_pair(CurSU, LRegs));
    1464             :       if (LRegsPair.second) {
    1465             :         CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
    1466             :         Interferences.push_back(CurSU);
    1467             :       }
    1468             :       else {
    1469             :         assert(CurSU->isPending && "Interferences are pending");
    1470             :         // Update the interference with current live regs.
    1471             :         LRegsPair.first->second = LRegs;
    1472             :       }
    1473             :       CurSU = AvailableQueue->pop();
    1474             :     }
    1475             :   };
    1476             :   FindAvailableNode();
    1477             :   if (CurSU)
    1478             :     return CurSU;
    1479             : 
    1480             :   // All candidates are delayed due to live physical reg dependencies.
    1481             :   // Try backtracking, code duplication, or inserting cross class copies
    1482             :   // to resolve it.
    1483    16318988 :   for (SUnit *TrySU : Interferences) {
    1484    16318988 :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1485    16318988 : 
    1486             :     // Try unscheduling up to the point where it's safe to schedule
    1487             :     // this node.
    1488             :     SUnit *BtSU = nullptr;
    1489             :     unsigned LiveCycle = std::numeric_limits<unsigned>::max();
    1490             :     for (unsigned Reg : LRegs) {
    1491        3876 :       if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
    1492        3423 :         BtSU = LiveRegGens[Reg];
    1493             :         LiveCycle = BtSU->getHeight();
    1494             :       }
    1495             :     }
    1496             :     if (!WillCreateCycle(TrySU, BtSU))  {
    1497             :       // BacktrackBottomUp mutates Interferences!
    1498        6846 :       BacktrackBottomUp(TrySU, BtSU);
    1499        6846 : 
    1500        3423 :       // Force the current node to be scheduled before the node that
    1501             :       // requires the physical reg dep.
    1502             :       if (BtSU->isAvailable) {
    1503             :         BtSU->isAvailable = false;
    1504        3423 :         if (!BtSU->isPending)
    1505             :           AvailableQueue->remove(BtSU);
    1506         952 :       }
    1507             :       LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
    1508             :                         << ") to SU(" << TrySU->NodeNum << ")\n");
    1509             :       AddPred(TrySU, SDep(BtSU, SDep::Artificial));
    1510         952 : 
    1511         952 :       // If one or more successors has been unscheduled, then the current
    1512         952 :       // node is no longer available.
    1513         952 :       if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
    1514             :         LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
    1515             :         CurSU = AvailableQueue->pop();
    1516             :       } else {
    1517         952 :         LLVM_DEBUG(dbgs() << "TrySU available\n");
    1518             :         // Available and in AvailableQueue
    1519             :         AvailableQueue->remove(TrySU);
    1520             :         CurSU = TrySU;
    1521         952 :       }
    1522             :       FindAvailableNode();
    1523         299 :       // Interferences has been mutated. We must break.
    1524             :       break;
    1525             :     }
    1526             :   }
    1527         653 : 
    1528         653 :   if (!CurSU) {
    1529             :     // Can't backtrack. If it's too expensive to copy the value, then try
    1530         952 :     // duplicate the nodes that produces these "too expensive to copy"
    1531             :     // values to break the dependency. In case even that doesn't work,
    1532         952 :     // insert cross class copies.
    1533             :     // If it's not too expensive, i.e. cost != -1, issue copies.
    1534             :     SUnit *TrySU = Interferences[0];
    1535             :     SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
    1536        1405 :     assert(LRegs.size() == 1 && "Can't handle this yet!");
    1537             :     unsigned Reg = LRegs[0];
    1538             :     SUnit *LRDef = LiveRegDefs[Reg];
    1539             :     MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
    1540             :     const TargetRegisterClass *RC =
    1541             :       TRI->getMinimalPhysRegClass(Reg, VT);
    1542         453 :     const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
    1543         453 : 
    1544             :     // If cross copy register class is the same as RC, then it must be possible
    1545         453 :     // copy the value directly. Do not try duplicate the def.
    1546         453 :     // If cross copy register class is not the same as RC, then it's possible to
    1547         453 :     // copy the value but it require cross register class copies and it is
    1548             :     // expensive.
    1549         453 :     // If cross copy register class is null, then it's not possible to copy
    1550         453 :     // the value at all.
    1551             :     SUnit *NewDef = nullptr;
    1552             :     if (DestRC != RC) {
    1553             :       NewDef = CopyAndMoveSuccessors(LRDef);
    1554             :       if (!DestRC && !NewDef)
    1555             :         report_fatal_error("Can't handle live physical register dependency!");
    1556             :     }
    1557             :     if (!NewDef) {
    1558             :       // Issue copies, these can be expensive cross register class copies.
    1559             :       SmallVector<SUnit*, 2> Copies;
    1560         453 :       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
    1561         453 :       LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
    1562         453 :                         << " to SU #" << Copies.front()->NodeNum << "\n");
    1563           0 :       AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
    1564             :       NewDef = Copies.back();
    1565         453 :     }
    1566             : 
    1567             :     LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
    1568          53 :                       << " to SU #" << TrySU->NodeNum << "\n");
    1569             :     LiveRegDefs[Reg] = NewDef;
    1570             :     AddPred(NewDef, SDep(TrySU, SDep::Artificial));
    1571         106 :     TrySU->isAvailable = false;
    1572          53 :     CurSU = NewDef;
    1573             :   }
    1574             :   assert(CurSU && "Unable to resolve live physical register dependencies!");
    1575             :   return CurSU;
    1576             : }
    1577         453 : 
    1578         906 : /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
    1579         453 : /// schedulers.
    1580         453 : void ScheduleDAGRRList::ListScheduleBottomUp() {
    1581             :   // Release any predecessors of the special Exit node.
    1582             :   ReleasePredecessors(&ExitSU);
    1583        1405 : 
    1584             :   // Add root to Available queue.
    1585             :   if (!SUnits.empty()) {
    1586             :     SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
    1587             :     assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
    1588     1269002 :     RootSU->isAvailable = true;
    1589             :     AvailableQueue->push(RootSU);
    1590     1269002 :   }
    1591             : 
    1592             :   // While Available queue is not empty, grab the node with the highest
    1593     1269002 :   // priority. If it is not ready put it back.  Schedule the node.
    1594     1265025 :   Sequence.reserve(SUnits.size());
    1595             :   while (!AvailableQueue->empty() || !Interferences.empty()) {
    1596     1265025 :     LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
    1597     1265025 :                AvailableQueue->dump(this));
    1598             : 
    1599             :     // Pick the best node to schedule taking all constraints into
    1600             :     // consideration.
    1601             :     SUnit *SU = PickNodeToScheduleBottomUp();
    1602     2538004 : 
    1603    17587989 :     AdvancePastStalls(SU);
    1604             : 
    1605             :     ScheduleNodeBottomUp(SU);
    1606             : 
    1607             :     while (AvailableQueue->empty() && !PendingQueue.empty()) {
    1608             :       // Advance the cycle to free resources. Skip ahead to the next ready SU.
    1609    16318987 :       assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
    1610             :              "MinAvailableCycle uninitialized");
    1611    16318988 :       AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
    1612             :     }
    1613    16318988 :   }
    1614             : 
    1615    16318987 :   // Reverse the order if it is bottom up.
    1616             :   std::reverse(Sequence.begin(), Sequence.end());
    1617             : 
    1618             : #ifndef NDEBUG
    1619           0 :   VerifyScheduledSequence(/*isBottomUp=*/true);
    1620             : #endif
    1621             : }
    1622             : 
    1623             : namespace {
    1624             : 
    1625             : class RegReductionPQBase;
    1626             : 
    1627             : struct queue_sort {
    1628             :   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
    1629     1269001 : };
    1630             : 
    1631             : #ifndef NDEBUG
    1632             : template<class SF>
    1633             : struct reverse_sort : public queue_sort {
    1634             :   SF &SortFunc;
    1635             : 
    1636             :   reverse_sort(SF &sf) : SortFunc(sf) {}
    1637             : 
    1638             :   bool operator()(SUnit* left, SUnit* right) const {
    1639             :     // reverse left/right rather than simply !SortFunc(left, right)
    1640             :     // to expose different paths in the comparison logic.
    1641             :     return SortFunc(right, left);
    1642             :   }
    1643             : };
    1644             : #endif // NDEBUG
    1645             : 
    1646             : /// bu_ls_rr_sort - Priority function for bottom up register pressure
    1647             : // reduction scheduler.
    1648             : struct bu_ls_rr_sort : public queue_sort {
    1649             :   enum {
    1650             :     IsBottomUp = true,
    1651             :     HasReadyFilter = false
    1652             :   };
    1653             : 
    1654             :   RegReductionPQBase *SPQ;
    1655             : 
    1656             :   bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1657             : 
    1658             :   bool operator()(SUnit* left, SUnit* right) const;
    1659             : };
    1660             : 
    1661             : // src_ls_rr_sort - Priority function for source order scheduler.
    1662             : struct src_ls_rr_sort : public queue_sort {
    1663             :   enum {
    1664        9613 :     IsBottomUp = true,
    1665             :     HasReadyFilter = false
    1666             :   };
    1667             : 
    1668             :   RegReductionPQBase *SPQ;
    1669             : 
    1670             :   src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1671             : 
    1672             :   bool operator()(SUnit* left, SUnit* right) const;
    1673             : };
    1674             : 
    1675             : // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
    1676             : struct hybrid_ls_rr_sort : public queue_sort {
    1677             :   enum {
    1678     1232168 :     IsBottomUp = true,
    1679             :     HasReadyFilter = false
    1680             :   };
    1681             : 
    1682             :   RegReductionPQBase *SPQ;
    1683             : 
    1684             :   hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1685             : 
    1686             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1687             : 
    1688             :   bool operator()(SUnit* left, SUnit* right) const;
    1689             : };
    1690             : 
    1691             : // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
    1692       11341 : // scheduler.
    1693             : struct ilp_ls_rr_sort : public queue_sort {
    1694             :   enum {
    1695             :     IsBottomUp = true,
    1696             :     HasReadyFilter = false
    1697             :   };
    1698             : 
    1699             :   RegReductionPQBase *SPQ;
    1700             : 
    1701             :   ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
    1702             : 
    1703             :   bool isReady(SUnit *SU, unsigned CurCycle) const;
    1704             : 
    1705             :   bool operator()(SUnit* left, SUnit* right) const;
    1706             : };
    1707             : 
    1708             : class RegReductionPQBase : public SchedulingPriorityQueue {
    1709       15880 : protected:
    1710             :   std::vector<SUnit *> Queue;
    1711             :   unsigned CurQueueId = 0;
    1712             :   bool TracksRegPressure;
    1713             :   bool SrcOrder;
    1714             : 
    1715             :   // SUnits - The SUnits for the current graph.
    1716             :   std::vector<SUnit> *SUnits;
    1717             : 
    1718             :   MachineFunction &MF;
    1719             :   const TargetInstrInfo *TII;
    1720             :   const TargetRegisterInfo *TRI;
    1721             :   const TargetLowering *TLI;
    1722             :   ScheduleDAGRRList *scheduleDAG = nullptr;
    1723             : 
    1724             :   // SethiUllmanNumbers - The SethiUllman number for each node.
    1725             :   std::vector<unsigned> SethiUllmanNumbers;
    1726             : 
    1727             :   /// RegPressure - Tracking current reg pressure per register class.
    1728             :   std::vector<unsigned> RegPressure;
    1729             : 
    1730             :   /// RegLimit - Tracking the number of allocatable registers per register
    1731             :   /// class.
    1732             :   std::vector<unsigned> RegLimit;
    1733             : 
    1734             : public:
    1735             :   RegReductionPQBase(MachineFunction &mf,
    1736             :                      bool hasReadyFilter,
    1737             :                      bool tracksrp,
    1738             :                      bool srcorder,
    1739             :                      const TargetInstrInfo *tii,
    1740             :                      const TargetRegisterInfo *tri,
    1741             :                      const TargetLowering *tli)
    1742             :     : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
    1743     1269002 :       SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
    1744             :     if (TracksRegPressure) {
    1745             :       unsigned NumRC = TRI->getNumRegClasses();
    1746             :       RegLimit.resize(NumRC);
    1747             :       RegPressure.resize(NumRC);
    1748             :       std::fill(RegLimit.begin(), RegLimit.end(), 0);
    1749             :       std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1750     1269002 :       for (const TargetRegisterClass *RC : TRI->regclasses())
    1751     5076008 :         RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
    1752     1269002 :     }
    1753             :   }
    1754       27221 : 
    1755       27221 :   void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
    1756             :     scheduleDAG = scheduleDag;
    1757             :   }
    1758     2474883 : 
    1759     7342986 :   ScheduleHazardRecognizer* getHazardRec() {
    1760             :     return scheduleDAG->getHazardRec();
    1761     1269002 :   }
    1762             : 
    1763           0 :   void initNodes(std::vector<SUnit> &sunits) override;
    1764     1269002 : 
    1765           0 :   void addNode(const SUnit *SU) override;
    1766             : 
    1767           0 :   void updateNode(const SUnit *SU) override;
    1768     9609188 : 
    1769             :   void releaseState() override {
    1770             :     SUnits = nullptr;
    1771             :     SethiUllmanNumbers.clear();
    1772             :     std::fill(RegPressure.begin(), RegPressure.end(), 0);
    1773             :   }
    1774             : 
    1775             :   unsigned getNodePriority(const SUnit *SU) const;
    1776             : 
    1777     1269001 :   unsigned getNodeOrdering(const SUnit *SU) const {
    1778     1269001 :     if (!SU->getNode()) return 0;
    1779             : 
    1780             :     return SU->getNode()->getIROrder();
    1781     1269001 :   }
    1782             : 
    1783             :   bool empty() const override { return Queue.empty(); }
    1784             : 
    1785           0 :   void push(SUnit *U) override {
    1786       12156 :     assert(!U->NodeQueueId && "Node in the queue already");
    1787             :     U->NodeQueueId = ++CurQueueId;
    1788       12156 :     Queue.push_back(U);
    1789             :   }
    1790             : 
    1791   133028776 :   void remove(SUnit *SU) override {
    1792             :     assert(!Queue.empty() && "Queue is empty!");
    1793    16331408 :     assert(SU->NodeQueueId != 0 && "Not in queue!");
    1794             :     std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
    1795    16331408 :     if (I != std::prev(Queue.end()))
    1796    16331408 :       std::swap(*I, Queue.back());
    1797    16331408 :     Queue.pop_back();
    1798             :     SU->NodeQueueId = 0;
    1799        3533 :   }
    1800             : 
    1801             :   bool tracksRegPressure() const override { return TracksRegPressure; }
    1802             : 
    1803        3533 :   void dumpRegPressure() const;
    1804             : 
    1805             :   bool HighRegPressure(const SUnit *SU) const;
    1806        3533 : 
    1807        3533 :   bool MayReduceRegPressure(SUnit *SU) const;
    1808             : 
    1809         201 :   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
    1810             : 
    1811             :   void scheduledNode(SUnit *SU) override;
    1812             : 
    1813             :   void unscheduledNode(SUnit *SU) override;
    1814             : 
    1815             : protected:
    1816             :   bool canClobber(const SUnit *SU, const SUnit *Op);
    1817             :   void AddPseudoTwoAddrDeps();
    1818             :   void PrescheduleNodesWithMultipleUses();
    1819             :   void CalculateSethiUllmanNumbers();
    1820             : };
    1821             : 
    1822             : template<class SF>
    1823             : static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
    1824             :   std::vector<SUnit *>::iterator Best = Q.begin();
    1825             :   for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
    1826             :     if (Picker(*Best, *I))
    1827             :       Best = I;
    1828             :   SUnit *V = *Best;
    1829             :   if (Best != std::prev(Q.end()))
    1830             :     std::swap(*Best, Q.back());
    1831    16327875 :   Q.pop_back();
    1832             :   return V;
    1833    35725813 : }
    1834    19397937 : 
    1835             : template<class SF>
    1836    16327876 : SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
    1837    16327876 : #ifndef NDEBUG
    1838             :   if (DAG->StressSched) {
    1839             :     reverse_sort<SF> RPicker(Picker);
    1840    16327876 :     return popFromQueueImpl(Q, RPicker);
    1841             :   }
    1842       70657 : #endif
    1843             :   (void)DAG;
    1844      179211 :   return popFromQueueImpl(Q, Picker);
    1845      108554 : }
    1846             : 
    1847       70657 : //===----------------------------------------------------------------------===//
    1848       70657 : //                RegReductionPriorityQueue Definition
    1849             : //===----------------------------------------------------------------------===//
    1850             : //
    1851       70657 : // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
    1852             : // to reduce register pressure.
    1853    16046194 : //
    1854             : template<class SF>
    1855    35034690 : class RegReductionPriorityQueue : public RegReductionPQBase {
    1856    18988495 :   SF Picker;
    1857             : 
    1858    16046195 : public:
    1859    16046195 :   RegReductionPriorityQueue(MachineFunction &mf,
    1860             :                             bool tracksrp,
    1861             :                             bool srcorder,
    1862    16046195 :                             const TargetInstrInfo *tii,
    1863             :                             const TargetRegisterInfo *tri,
    1864       86880 :                             const TargetLowering *tli)
    1865             :     : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
    1866      236001 :                          tii, tri, tli),
    1867      149121 :       Picker(this) {}
    1868             : 
    1869       86880 :   bool isBottomUp() const override { return SF::IsBottomUp; }
    1870       86880 : 
    1871             :   bool isReady(SUnit *U) const override {
    1872             :     return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
    1873       86880 :   }
    1874             : 
    1875      124144 :   SUnit *pop() override {
    1876             :     if (Queue.empty()) return nullptr;
    1877      275911 : 
    1878      151767 :     SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
    1879             :     V->NodeQueueId = 0;
    1880      124144 :     return V;
    1881      124144 :   }
    1882             : 
    1883             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    1884      124144 :   LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
    1885             :     // Emulate pop() without clobbering NodeQueueIds.
    1886             :     std::vector<SUnit *> DumpQueue = Queue;
    1887             :     SF DumpPicker = Picker;
    1888           0 :     while (!DumpQueue.empty()) {
    1889             :       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
    1890             :       dbgs() << "Height " << SU->getHeight() << ": ";
    1891             :       DAG->dumpNode(*SU);
    1892             :     }
    1893             :   }
    1894             : #endif
    1895             : };
    1896    16327875 : 
    1897             : using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
    1898           0 : using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
    1899             : using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
    1900             : using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
    1901             : 
    1902             : } // end anonymous namespace
    1903             : 
    1904             : //===----------------------------------------------------------------------===//
    1905             : //           Static Node Priority for Register Pressure Reduction
    1906           0 : //===----------------------------------------------------------------------===//
    1907             : 
    1908           0 : // Check for special nodes that bypass scheduling heuristics.
    1909             : // Currently this pushes TokenFactor nodes down, but may be used for other
    1910             : // pseudo-ops as well.
    1911             : //
    1912             : // Return -1 to schedule right above left, 1 for left above right.
    1913             : // Return 0 if no bias exists.
    1914             : static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
    1915             :   bool LSchedLow = left->isScheduleLow;
    1916           0 :   bool RSchedLow = right->isScheduleLow;
    1917             :   if (LSchedLow != RSchedLow)
    1918           0 :     return LSchedLow < RSchedLow ? 1 : -1;
    1919             :   return 0;
    1920             : }
    1921             : 
    1922             : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
    1923             : /// Smaller number is the higher priority.
    1924             : static unsigned
    1925             : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
    1926           0 :   if (SUNumbers[SU->NodeNum] != 0)
    1927             :     return SUNumbers[SU->NodeNum];
    1928           0 : 
    1929             :   // Use WorkList to avoid stack overflow on excessively large IRs.
    1930             :   struct WorkState {
    1931             :     WorkState(const SUnit *SU) : SU(SU) {}
    1932             :     const SUnit *SU;
    1933             :     unsigned PredsProcessed = 0;
    1934             :   };
    1935             : 
    1936           0 :   SmallVector<WorkState, 16> WorkList;
    1937             :   WorkList.push_back(SU);
    1938             :   while (!WorkList.empty()) {
    1939             :     auto &Temp = WorkList.back();
    1940             :     auto *TempSU = Temp.SU;
    1941             :     bool AllPredsKnown = true;
    1942             :     // Try to find a non-evaluated pred and push it into the processing stack.
    1943             :     for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
    1944             :       auto &Pred = TempSU->Preds[P];
    1945             :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1946             :       SUnit *PredSU = Pred.getSUnit();
    1947             :       if (SUNumbers[PredSU->NodeNum] == 0) {
    1948             : #ifndef NDEBUG
    1949             :         // In debug mode, check that we don't have such element in the stack.
    1950             :         for (auto It : WorkList)
    1951     1269002 :           assert(It.SU != PredSU && "Trying to push an element twice?");
    1952             : #endif
    1953             :         // Next time start processing this one starting from the next pred.
    1954             :         Temp.PredsProcessed = P + 1;
    1955             :         WorkList.push_back(PredSU);
    1956             :         AllPredsKnown = false;
    1957             :         break;
    1958             :       }
    1959     2538004 :     }
    1960             : 
    1961           0 :     if (!AllPredsKnown)
    1962             :       continue;
    1963           0 : 
    1964           0 :     // Once all preds are known, we can calculate the answer for this one.
    1965             :     unsigned SethiUllmanNumber = 0;
    1966           0 :     unsigned Extra = 0;
    1967           0 :     for (const SDep &Pred : TempSU->Preds) {
    1968             :       if (Pred.isCtrl()) continue;  // ignore chain preds
    1969           0 :       SUnit *PredSU = Pred.getSUnit();
    1970           0 :       unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
    1971             :       assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
    1972           0 :       if (PredSethiUllman > SethiUllmanNumber) {
    1973           0 :         SethiUllmanNumber = PredSethiUllman;
    1974             :         Extra = 0;
    1975           0 :       } else if (PredSethiUllman == SethiUllmanNumber)
    1976           0 :         ++Extra;
    1977             :     }
    1978             : 
    1979    16328570 :     SethiUllmanNumber += Extra;
    1980    16328570 :     if (SethiUllmanNumber == 0)
    1981             :       SethiUllmanNumber = 1;
    1982    16327875 :     SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
    1983    16327876 :     WorkList.pop_back();
    1984    16327876 :   }
    1985             : 
    1986       70679 :   assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
    1987       70679 :   return SUNumbers[SU->NodeNum];
    1988             : }
    1989       70657 : 
    1990       70657 : /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
    1991       70657 : /// scheduling units.
    1992             : void RegReductionPQBase::CalculateSethiUllmanNumbers() {
    1993    16046843 :   SethiUllmanNumbers.assign(SUnits->size(), 0);
    1994    16046843 : 
    1995             :   for (const SUnit &SU : *SUnits)
    1996    16046194 :     CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
    1997    16046195 : }
    1998    16046195 : 
    1999             : void RegReductionPQBase::addNode(const SUnit *SU) {
    2000       86894 :   unsigned SUSize = SethiUllmanNumbers.size();
    2001       86894 :   if (SUnits->size() > SUSize)
    2002             :     SethiUllmanNumbers.resize(SUSize*2, 0);
    2003       86880 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    2004       86880 : }
    2005       86880 : 
    2006             : void RegReductionPQBase::updateNode(const SUnit *SU) {
    2007      124154 :   SethiUllmanNumbers[SU->NodeNum] = 0;
    2008      124154 :   CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
    2009             : }
    2010      124144 : 
    2011      124144 : // Lower priority means schedule further down. For bottom-up scheduling, lower
    2012      124144 : // priority SUs are scheduled before higher priority SUs.
    2013             : unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
    2014             :   assert(SU->NodeNum < SethiUllmanNumbers.size());
    2015             :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2016             :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2017             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2018             :     // avoid spilling.
    2019             :     return 0;
    2020             :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2021             :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2022             :       Opc == TargetOpcode::INSERT_SUBREG)
    2023             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2024             :     // close to their uses to facilitate coalescing.
    2025             :     return 0;
    2026             :   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
    2027             :     // If SU does not have a register use, i.e. it doesn't produce a value
    2028             :     // that would be consumed (e.g. store), then it terminates a chain of
    2029             :     // computation.  Give it a large SethiUllman number so it will be
    2030             :     // scheduled right before its predecessors that it doesn't lengthen
    2031             :     // their live ranges.
    2032             :     return 0xffff;
    2033             :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2034             :     // If SU does not have a register def, schedule it close to its uses
    2035             :     // because it does not lengthen any live ranges.
    2036             :     return 0;
    2037             : #if 1
    2038             :   return SethiUllmanNumbers[SU->NodeNum];
    2039             : #else
    2040             :   unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
    2041             :   if (SU->isCallOp) {
    2042             :     // FIXME: This assumes all of the defs are used as call operands.
    2043             :     int NP = (int)Priority - SU->getNode()->getNumValues();
    2044             :     return (NP > 0) ? NP : 0;
    2045             :   }
    2046             :   return Priority;
    2047      108554 : #endif
    2048      108554 : }
    2049      108554 : 
    2050         352 : //===----------------------------------------------------------------------===//
    2051             : //                     Register Pressure Tracking
    2052             : //===----------------------------------------------------------------------===//
    2053             : 
    2054             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2055             : LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
    2056             :   for (const TargetRegisterClass *RC : TRI->regclasses()) {
    2057    16310610 :     unsigned Id = RC->getID();
    2058    32621220 :     unsigned RP = RegPressure[Id];
    2059             :     if (!RP) continue;
    2060             :     LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
    2061             :                       << RegLimit[Id] << '\n');
    2062             :   }
    2063    16310611 : }
    2064             : #endif
    2065             : 
    2066             : bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
    2067             :   if (!TLI)
    2068     9046265 :     return false;
    2069     9046265 : 
    2070    32621222 :   for (const SDep &Pred : SU->Preds) {
    2071             :     if (Pred.isCtrl())
    2072    23574957 :       continue;
    2073             :     SUnit *PredSU = Pred.getSUnit();
    2074             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2075    36079476 :     // to cover the number of registers defined (they are all live).
    2076             :     if (PredSU->NumRegDefsLeft == 0) {
    2077    19768865 :       continue;
    2078             :     }
    2079    17778940 :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2080             :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2081             :       unsigned RCId, Cost;
    2082             :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2083             : 
    2084             :       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
    2085             :         return true;
    2086     7264346 :     }
    2087     7264346 :   }
    2088             :   return false;
    2089             : }
    2090             : 
    2091             : bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
    2092             :   const SDNode *N = SU->getNode();
    2093             : 
    2094     7264346 :   if (!N->isMachineOpcode() || !SU->NumSuccs)
    2095             :     return false;
    2096             : 
    2097             :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2098             :   for (unsigned i = 0; i != NumDefs; ++i) {
    2099    36079476 :     MVT VT = N->getSimpleValueType(i);
    2100    19768865 :     if (!N->hasAnyUseOfValue(i))
    2101             :       continue;
    2102     8889470 :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2103             :     if (RegPressure[RCId] >= RegLimit[RCId])
    2104     8889470 :       return true;
    2105             :   }
    2106             :   return false;
    2107     2088706 : }
    2108     1920511 : 
    2109             : // Compute the register pressure contribution by this instruction by count up
    2110             : // for uses that are not live and down for defs. Only count register classes
    2111    16310611 : // that are already under high pressure. As a side effect, compute the number of
    2112    16310611 : // uses of registers that are already live.
    2113             : //
    2114    32621222 : // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
    2115             : // so could probably be factored.
    2116             : int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
    2117             :   LiveUses = 0;
    2118             :   int PDiff = 0;
    2119    18092530 :   for (const SDep &Pred : SU->Preds) {
    2120             :     if (Pred.isCtrl())
    2121             :       continue;
    2122             :     SUnit *PredSU = Pred.getSUnit();
    2123             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2124     1269002 :     // to cover the number of registers defined (they are all live).
    2125     1269002 :     if (PredSU->NumRegDefsLeft == 0) {
    2126             :       if (PredSU->getNode()->isMachineOpcode())
    2127    17578636 :         ++LiveUses;
    2128    16309634 :       continue;
    2129     1269002 :     }
    2130             :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2131         650 :          RegDefPos.IsValid(); RegDefPos.Advance()) {
    2132         650 :       MVT VT = RegDefPos.GetValue();
    2133        1300 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2134         295 :       if (RegPressure[RCId] >= RegLimit[RCId])
    2135         650 :         ++PDiff;
    2136         650 :     }
    2137             :   }
    2138         327 :   const SDNode *N = SU->getNode();
    2139         327 : 
    2140         327 :   if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
    2141         327 :     return PDiff;
    2142             : 
    2143             :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2144             :   for (unsigned i = 0; i != NumDefs; ++i) {
    2145    20103644 :     MVT VT = N->getSimpleValueType(i);
    2146             :     if (!N->hasAnyUseOfValue(i))
    2147    20103644 :       continue;
    2148    20103644 :     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2149             :     if (RegPressure[RCId] >= RegLimit[RCId])
    2150             :       --PDiff;
    2151             :   }
    2152    36218672 :   return PDiff;
    2153    18109336 : }
    2154             : 
    2155             : void RegReductionPQBase::scheduledNode(SUnit *SU) {
    2156             :   if (!TracksRegPressure)
    2157             :     return;
    2158    18109336 : 
    2159             :   if (!SU->getNode())
    2160             :     return;
    2161             : 
    2162             :   for (const SDep &Pred : SU->Preds) {
    2163             :     if (Pred.isCtrl())
    2164             :       continue;
    2165    14774422 :     SUnit *PredSU = Pred.getSUnit();
    2166             :     // NumRegDefsLeft is zero when enough uses of this node have been scheduled
    2167             :     // to cover the number of registers defined (they are all live).
    2168             :     if (PredSU->NumRegDefsLeft == 0) {
    2169             :       continue;
    2170    22019426 :     }
    2171             :     // FIXME: The ScheduleDAG currently loses information about which of a
    2172             :     // node's values is consumed by each dependence. Consequently, if the node
    2173             :     // defines multiple register classes, we don't know which to pressurize
    2174             :     // here. Instead the following loop consumes the register defs in an
    2175             :     // arbitrary order. At least it handles the common case of clustered loads
    2176             :     // to the same class. For precise liveness, each SDep needs to indicate the
    2177             :     // result number. But that tightly couples the ScheduleDAG with the
    2178             :     // SelectionDAG making updates tricky. A simpler hack would be to attach a
    2179             :     // value type or register class to SDep.
    2180             :     //
    2181             :     // The most important aspect of register tracking is balancing the increase
    2182             :     // here with the reduction further below. Note that this SU may use multiple
    2183             :     // defs in PredSU. The can't be determined here, but we've already
    2184             :     // compensated by reducing NumRegDefsLeft in PredSU during
    2185             :     // ScheduleDAGSDNodes::AddSchedEdges.
    2186             :     --PredSU->NumRegDefsLeft;
    2187             :     unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
    2188             :     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
    2189             :          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2190             :       if (SkipRegDefs)
    2191             :         continue;
    2192             : 
    2193             :       unsigned RCId, Cost;
    2194             :       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2195             :       RegPressure[RCId] += Cost;
    2196             :       break;
    2197             :     }
    2198      292004 :   }
    2199      292004 : 
    2200             :   // We should have this assert, but there may be dead SDNodes that never
    2201             :   // materialize as SUnits, so they don't appear to generate liveness.
    2202      609177 :   //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
    2203      337297 :   int SkipRegDefs = (int)SU->NumRegDefsLeft;
    2204             :   for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
    2205             :        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
    2206             :     if (SkipRegDefs > 0)
    2207             :       continue;
    2208      321333 :     unsigned RCId, Cost;
    2209             :     GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
    2210             :     if (RegPressure[RCId] < Cost) {
    2211      369594 :       // Register pressure tracking is imprecise. This can happen. But we try
    2212      556868 :       // hard not to let it happen because it likely results in poor scheduling.
    2213             :       LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum
    2214      207398 :                         << ") has too many regdefs\n");
    2215             :       RegPressure[RCId] = 0;
    2216      622194 :     }
    2217       20124 :     else {
    2218             :       RegPressure[RCId] -= Cost;
    2219             :     }
    2220             :   }
    2221             :   LLVM_DEBUG(dumpRegPressure());
    2222             : }
    2223             : 
    2224             : void RegReductionPQBase::unscheduledNode(SUnit *SU) {
    2225             :   if (!TracksRegPressure)
    2226             :     return;
    2227             : 
    2228             :   const SDNode *N = SU->getNode();
    2229             :   if (!N) return;
    2230             : 
    2231             :   if (!N->isMachineOpcode()) {
    2232             :     if (N->getOpcode() != ISD::CopyToReg)
    2233             :       return;
    2234             :   } else {
    2235             :     unsigned Opc = N->getMachineOpcode();
    2236             :     if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2237             :         Opc == TargetOpcode::INSERT_SUBREG ||
    2238             :         Opc == TargetOpcode::SUBREG_TO_REG ||
    2239             :         Opc == TargetOpcode::REG_SEQUENCE ||
    2240             :         Opc == TargetOpcode::IMPLICIT_DEF)
    2241             :       return;
    2242             :   }
    2243             : 
    2244             :   for (const SDep &Pred : SU->Preds) {
    2245             :     if (Pred.isCtrl())
    2246             :       continue;
    2247             :     SUnit *PredSU = Pred.getSUnit();
    2248      292100 :     // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
    2249      292100 :     // counts data deps.
    2250             :     if (PredSU->NumSuccsLeft != PredSU->Succs.size())
    2251      598843 :       continue;
    2252      306743 :     const SDNode *PN = PredSU->getNode();
    2253             :     if (!PN->isMachineOpcode()) {
    2254             :       if (PN->getOpcode() == ISD::CopyFromReg) {
    2255             :         MVT VT = PN->getSimpleValueType(0);
    2256             :         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2257      281889 :         RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2258       67958 :       }
    2259       56278 :       continue;
    2260       67958 :     }
    2261             :     unsigned POpc = PN->getMachineOpcode();
    2262      427973 :     if (POpc == TargetOpcode::IMPLICIT_DEF)
    2263      642015 :       continue;
    2264      214042 :     if (POpc == TargetOpcode::EXTRACT_SUBREG ||
    2265      214042 :         POpc == TargetOpcode::INSERT_SUBREG ||
    2266      642126 :         POpc == TargetOpcode::SUBREG_TO_REG) {
    2267       80982 :       MVT VT = PN->getSimpleValueType(0);
    2268             :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2269             :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2270      292100 :       continue;
    2271             :     }
    2272      292100 :     unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
    2273             :     for (unsigned i = 0; i != NumDefs; ++i) {
    2274             :       MVT VT = PN->getSimpleValueType(i);
    2275      385509 :       if (!PN->hasAnyUseOfValue(i))
    2276      256676 :         continue;
    2277      128173 :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2278      128173 :       if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
    2279           6 :         // Register pressure tracking is imprecise. This can happen.
    2280      128167 :         RegPressure[RCId] = 0;
    2281      384501 :       else
    2282       24695 :         RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
    2283             :     }
    2284             :   }
    2285             : 
    2286             :   // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
    2287    16318987 :   // may transfer data dependencies to CopyToReg.
    2288    16318987 :   if (SU->NumSuccs && N->isMachineOpcode()) {
    2289             :     unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2290             :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2291      210746 :       MVT VT = N->getSimpleValueType(i);
    2292             :       if (VT == MVT::Glue || VT == MVT::Other)
    2293             :         continue;
    2294      439925 :       if (!N->hasAnyUseOfValue(i))
    2295      229181 :         continue;
    2296             :       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
    2297             :       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
    2298             :     }
    2299             :   }
    2300      163763 : 
    2301             :   LLVM_DEBUG(dumpRegPressure());
    2302             : }
    2303             : 
    2304             : //===----------------------------------------------------------------------===//
    2305             : //           Dynamic Node Priority for Register Pressure Reduction
    2306             : //===----------------------------------------------------------------------===//
    2307             : 
    2308             : /// closestSucc - Returns the scheduled cycle of the successor which is
    2309             : /// closest to the current cycle.
    2310             : static unsigned closestSucc(const SUnit *SU) {
    2311             :   unsigned MaxHeight = 0;
    2312             :   for (const SDep &Succ : SU->Succs) {
    2313             :     if (Succ.isCtrl()) continue;  // ignore chain succs
    2314             :     unsigned Height = Succ.getSUnit()->getHeight();
    2315             :     // If there are bunch of CopyToRegs stacked up, they should be considered
    2316             :     // to be at the same position.
    2317             :     if (Succ.getSUnit()->getNode() &&
    2318      139910 :         Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
    2319      139910 :       Height = closestSucc(Succ.getSUnit())+1;
    2320      140376 :     if (Height > MaxHeight)
    2321      140376 :       MaxHeight = Height;
    2322      140376 :   }
    2323         466 :   return MaxHeight;
    2324             : }
    2325             : 
    2326      139910 : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
    2327      139910 : /// for scratch registers, i.e. number of data dependencies.
    2328      139910 : static unsigned calcMaxScratches(const SUnit *SU) {
    2329             :   unsigned Scratches = 0;
    2330             :   for (const SDep &Pred : SU->Preds) {
    2331             :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2332             :     Scratches++;
    2333             :   }
    2334             :   return Scratches;
    2335      210744 : }
    2336      354561 : 
    2337      498378 : /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
    2338      143817 : /// CopyFromReg from a virtual register.
    2339           5 : static bool hasOnlyLiveInOpers(const SUnit *SU) {
    2340             :   bool RetVal = false;
    2341      143812 :   for (const SDep &Pred : SU->Preds) {
    2342      287624 :     if (Pred.isCtrl()) continue;
    2343             :     const SUnit *PredSU = Pred.getSUnit();
    2344             :     if (PredSU->getNode() &&
    2345             :         PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
    2346             :       unsigned Reg =
    2347        3941 :         cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
    2348             :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2349             :         RetVal = true;
    2350      139871 :         continue;
    2351             :       }
    2352             :     }
    2353             :     return false;
    2354             :   }
    2355             :   return RetVal;
    2356        8840 : }
    2357        8840 : 
    2358             : /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
    2359             : /// CopyToReg to a virtual register. This SU def is probably a liveout and
    2360         156 : /// it has no other use. It should be scheduled closer to the terminator.
    2361         156 : static bool hasOnlyLiveOutUses(const SUnit *SU) {
    2362             :   bool RetVal = false;
    2363         156 :   for (const SDep &Succ : SU->Succs) {
    2364          38 :     if (Succ.isCtrl()) continue;
    2365             :     const SUnit *SuccSU = Succ.getSUnit();
    2366             :     if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
    2367             :       unsigned Reg =
    2368         236 :         cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
    2369         118 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    2370         118 :         RetVal = true;
    2371         216 :         continue;
    2372         108 :       }
    2373             :     }
    2374             :     return false;
    2375             :   }
    2376         418 :   return RetVal;
    2377         301 : }
    2378             : 
    2379             : // Set isVRegCycle for a node with only live in opers and live out uses. Also
    2380             : // set isVRegCycle for its CopyFromReg operands.
    2381             : //
    2382         253 : // This is only relevant for single-block loops, in which case the VRegCycle
    2383             : // node is likely an induction variable in which the operand and target virtual
    2384         150 : // registers should be coalesced (e.g. pre/post increment values). Setting the
    2385         150 : // isVRegCycle flag helps the scheduler prioritize other uses of the same
    2386          24 : // CopyFromReg so that this node becomes the virtual register "kill". This
    2387          24 : // avoids interference between the values live in and out of the block and
    2388          24 : // eliminates a copy inside the loop.
    2389          24 : static void initVRegCycle(SUnit *SU) {
    2390             :   if (DisableSchedVRegCycle)
    2391          24 :     return;
    2392             : 
    2393             :   if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
    2394         126 :     return;
    2395             : 
    2396         252 :   LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
    2397         126 : 
    2398         126 :   SU->isVRegCycle = true;
    2399           2 : 
    2400           2 :   for (const SDep &Pred : SU->Preds) {
    2401           2 :     if (Pred.isCtrl()) continue;
    2402             :     Pred.getSUnit()->isVRegCycle = true;
    2403             :   }
    2404         248 : }
    2405         234 : 
    2406         110 : // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
    2407         110 : // CopyFromReg operands. We should no longer penalize other uses of this VReg.
    2408           0 : static void resetVRegCycle(SUnit *SU) {
    2409         110 :   if (!SU->isVRegCycle)
    2410         220 :     return;
    2411             : 
    2412          84 :   for (const SDep &Pred : SU->Preds) {
    2413             :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2414          68 :     SUnit *PredSU = Pred.getSUnit();
    2415             :     if (PredSU->isVRegCycle) {
    2416             :       assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
    2417             :              "VRegCycle def must be CopyFromReg");
    2418             :       Pred.getSUnit()->isVRegCycle = false;
    2419             :     }
    2420         117 :   }
    2421         204 : }
    2422          88 : 
    2423             : // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
    2424          20 : // means a node that defines the VRegCycle has not been scheduled yet.
    2425             : static bool hasVRegCycleUse(const SUnit *SU) {
    2426          20 :   // If this SU also defines the VReg, don't hoist it as a "use".
    2427             :   if (SU->isVRegCycle)
    2428           3 :     return false;
    2429           3 : 
    2430             :   for (const SDep &Pred : SU->Preds) {
    2431             :     if (Pred.isCtrl()) continue;  // ignore chain preds
    2432             :     if (Pred.getSUnit()->isVRegCycle &&
    2433             :         Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
    2434             :       LLVM_DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
    2435             :       return true;
    2436             :     }
    2437             :   }
    2438             :   return false;
    2439             : }
    2440             : 
    2441             : // Check for either a dependence (latency) or resource (hazard) stall.
    2442    16715603 : //
    2443             : // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
    2444    36765604 : static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
    2445    20050001 :   if ((int)SPQ->getCurCycle() < Height) return true;
    2446             :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2447             :       != ScheduleHazardRecognizer::NoHazard)
    2448             :     return true;
    2449    14435208 :   return false;
    2450    14435208 : }
    2451      226951 : 
    2452    14435208 : // Return -1 if left has higher priority, 1 if right has higher priority.
    2453             : // Return 0 if latency-based priority is equivalent.
    2454             : static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
    2455    16715603 :                             RegReductionPQBase *SPQ) {
    2456             :   // Scheduling an instruction that uses a VReg whose postincrement has not yet
    2457             :   // been scheduled will induce a copy. Model this as an extra cycle of latency.
    2458             :   int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
    2459             :   int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
    2460             :   int LHeight = (int)left->getHeight() + LPenalty;
    2461             :   int RHeight = (int)right->getHeight() + RPenalty;
    2462    16642518 : 
    2463     9768626 :   bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
    2464     7491295 :     BUHasStall(left, LHeight, SPQ);
    2465             :   bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
    2466             :     BUHasStall(right, RHeight, SPQ);
    2467             : 
    2468             :   // If scheduling one of the node will cause a pipeline stall, delay it.
    2469             :   // If scheduling either one of the node will cause a pipeline stall, sort
    2470             :   // them according to their height.
    2471       82971 :   if (LStall) {
    2472             :     if (!RStall)
    2473      137869 :       return 1;
    2474       92317 :     if (LHeight != RHeight)
    2475             :       return LHeight > RHeight ? 1 : -1;
    2476       57898 :   } else if (RStall)
    2477       57898 :     return -1;
    2478             : 
    2479       49428 :   // If either node is scheduling for latency, sort them by height/depth
    2480       24714 :   // and latency.
    2481             :   if (!checkPref || (left->SchedulingPref == Sched::ILP ||
    2482             :                      right->SchedulingPref == Sched::ILP)) {
    2483             :     // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
    2484             :     // is enabled, grouping instructions by cycle, then its height is already
    2485             :     // covered so only its depth matters. We also reach this point if both stall
    2486             :     // but have the same height.
    2487             :     if (!SPQ->getHazardRec()->isEnabled()) {
    2488             :       if (LHeight != RHeight)
    2489             :         return LHeight > RHeight ? 1 : -1;
    2490             :     }
    2491             :     int LDepth = left->getDepth() - LPenalty;
    2492             :     int RDepth = right->getDepth() - RPenalty;
    2493        9726 :     if (LDepth != RDepth) {
    2494             :       LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
    2495       13453 :                         << ") depth " << LDepth << " vs SU (" << right->NodeNum
    2496       11377 :                         << ") depth " << RDepth << "\n");
    2497             :       return LDepth < RDepth ? 1 : -1;
    2498        9349 :     }
    2499             :     if (left->Latency != right->Latency)
    2500        3408 :       return left->Latency > right->Latency ? 1 : -1;
    2501        1704 :   }
    2502             :   return 0;
    2503             : }
    2504             : 
    2505             : static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
    2506             :   // Schedule physical register definitions close to their use. This is
    2507             :   // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
    2508             :   // long as shortening physreg live ranges is generally good, we can defer
    2509             :   // creating a subtarget hook.
    2510             :   if (!DisableSchedPhysRegJoin) {
    2511             :     bool LHasPhysReg = left->hasPhysRegDefs;
    2512             :     bool RHasPhysReg = right->hasPhysRegDefs;
    2513             :     if (LHasPhysReg != RHasPhysReg) {
    2514             :       #ifndef NDEBUG
    2515             :       static const char *const PhysRegMsg[] = { " has no physreg",
    2516             :                                                 " defines a physreg" };
    2517             :       #endif
    2518             :       LLVM_DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
    2519             :                         << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
    2520             :                         << ") " << PhysRegMsg[RHasPhysReg] << "\n");
    2521       82971 :       return LHasPhysReg < RHasPhysReg;
    2522       82971 :     }
    2523             :   }
    2524             : 
    2525       82971 :   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
    2526             :   unsigned LPriority = SPQ->getNodePriority(left);
    2527             :   unsigned RPriority = SPQ->getNodePriority(right);
    2528             : 
    2529             :   // Be really careful about hoisting call operands above previous calls.
    2530        1234 :   // Only allows it if it would reduce register pressure.
    2531             :   if (left->isCall && right->isCallOp) {
    2532        2704 :     unsigned RNumVals = right->getNode()->getNumValues();
    2533        1470 :     RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
    2534        1447 :   }
    2535             :   if (right->isCall && left->isCallOp) {
    2536             :     unsigned LNumVals = left->getNode()->getNumValues();
    2537             :     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
    2538             :   }
    2539             : 
    2540    16318987 :   if (LPriority != RPriority)
    2541    16318987 :     return LPriority > RPriority;
    2542             : 
    2543             :   // One or both of the nodes are calls and their sethi-ullman numbers are the
    2544        2704 :   // same, then keep source order.
    2545        1470 :   if (left->isCall || right->isCall) {
    2546             :     unsigned LOrder = SPQ->getNodeOrdering(left);
    2547        1447 :     unsigned ROrder = SPQ->getNodeOrdering(right);
    2548             : 
    2549             :     // Prefer an ordering where the lower the non-zero order number, the higher
    2550        1405 :     // the preference.
    2551             :     if ((LOrder || ROrder) && LOrder != ROrder)
    2552             :       return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2553             :   }
    2554             : 
    2555             :   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
    2556             :   // e.g.
    2557     6647702 :   // t1 = op t2, c1
    2558             :   // t3 = op t4, c2
    2559     6647702 :   //
    2560             :   // and the following instructions are both ready.
    2561             :   // t2 = op c3
    2562    15398135 :   // t4 = op c4
    2563     8750730 :   //
    2564     6897747 :   // Then schedule t2 = op first.
    2565         904 :   // i.e.
    2566             :   // t4 = op c4
    2567             :   // t2 = op c3
    2568             :   // t1 = op t2, c1
    2569             :   // t3 = op t4, c2
    2570             :   //
    2571             :   // This creates more short live intervals.
    2572             :   unsigned LDist = closestSucc(left);
    2573             :   unsigned RDist = closestSucc(right);
    2574             :   if (LDist != RDist)
    2575             :     return LDist < RDist;
    2576             : 
    2577     6441707 :   // How many registers becomes live when the node is scheduled.
    2578     6399954 :   unsigned LScratch = calcMaxScratches(left);
    2579             :   unsigned RScratch = calcMaxScratches(right);
    2580             :   if (LScratch != RScratch)
    2581             :     return LScratch > RScratch;
    2582             : 
    2583             :   // Comparing latency against a call makes little sense unless the node
    2584             :   // is register pressure-neutral.
    2585             :   if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
    2586     3323851 :     return (left->NodeQueueId > right->NodeQueueId);
    2587             : 
    2588             :   // Do not compare latencies when one or both of the nodes are calls.
    2589             :   if (!DisableSchedCycles &&
    2590     3323851 :       !(left->isCall || right->isCall)) {
    2591     3323851 :     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
    2592     3323851 :     if (result != 0)
    2593     3323851 :       return result > 0;
    2594             :   }
    2595     3323851 :   else {
    2596             :     if (left->getHeight() != right->getHeight())
    2597     3323851 :       return left->getHeight() > right->getHeight();
    2598             : 
    2599             :     if (left->getDepth() != right->getDepth())
    2600             :       return left->getDepth() < right->getDepth();
    2601             :   }
    2602             : 
    2603     3323851 :   assert(left->NodeQueueId && right->NodeQueueId &&
    2604       43501 :          "NodeQueueId cannot be zero");
    2605             :   return (left->NodeQueueId > right->NodeQueueId);
    2606       37285 : }
    2607        9979 : 
    2608     3280350 : // Bottom up
    2609             : bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2610             :   if (int res = checkSpecialNodes(left, right))
    2611             :     return res > 0;
    2612             : 
    2613     3304955 :   return BURRSort(left, right, SPQ);
    2614       97969 : }
    2615             : 
    2616             : // Source order, otherwise bottom up.
    2617             : bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2618             :   if (int res = checkSpecialNodes(left, right))
    2619     3209234 :     return res > 0;
    2620     3103414 : 
    2621      192596 :   unsigned LOrder = SPQ->getNodeOrdering(left);
    2622             :   unsigned ROrder = SPQ->getNodeOrdering(right);
    2623     3103773 : 
    2624     3103773 :   // Prefer an ordering where the lower the non-zero order number, the higher
    2625     3103773 :   // the preference.
    2626             :   if ((LOrder || ROrder) && LOrder != ROrder)
    2627             :     return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
    2628             : 
    2629      672031 :   return BURRSort(left, right, SPQ);
    2630             : }
    2631     2566767 : 
    2632        2509 : // If the time between now and when the instruction will be ready can cover
    2633             : // the spill code, then avoid adding it to the ready queue. This gives long
    2634             : // stalls highest priority and allows hoisting across calls. It should also
    2635             : // speed up processing the available queue.
    2636             : bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2637    10097556 :   static const unsigned ReadyDelay = 3;
    2638             : 
    2639             :   if (SPQ->MayReduceRegPressure(SU)) return true;
    2640             : 
    2641             :   if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
    2642    10097556 : 
    2643    10097556 :   if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
    2644    10097556 :       != ScheduleHazardRecognizer::NoHazard)
    2645    10097556 :     return false;
    2646             : 
    2647             :   return true;
    2648             : }
    2649             : 
    2650             : // Return true if right should be scheduled with higher priority than left.
    2651             : bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2652             :   if (int res = checkSpecialNodes(left, right))
    2653       45734 :     return res > 0;
    2654             : 
    2655             :   if (left->isCall || right->isCall)
    2656             :     // No way to compute latency of calls.
    2657             :     return BURRSort(left, right, SPQ);
    2658    10051822 : 
    2659    10051822 :   bool LHigh = SPQ->HighRegPressure(left);
    2660             :   bool RHigh = SPQ->HighRegPressure(right);
    2661             :   // Avoid causing spills. If register pressure is high, schedule for
    2662             :   // register pressure reduction.
    2663    10051822 :   if (LHigh && !RHigh) {
    2664         727 :     LLVM_DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
    2665         727 :                       << right->NodeNum << ")\n");
    2666             :     return true;
    2667    10051822 :   }
    2668         800 :   else if (!LHigh && RHigh) {
    2669         800 :     LLVM_DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
    2670             :                       << left->NodeNum << ")\n");
    2671             :     return false;
    2672    10051822 :   }
    2673     1805916 :   if (!LHigh && !RHigh) {
    2674             :     int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
    2675             :     if (result != 0)
    2676             :       return result > 0;
    2677     8245906 :   }
    2678             :   return BURRSort(left, right, SPQ);
    2679             : }
    2680             : 
    2681             : // Schedule as many instructions in each cycle as possible. So don't make an
    2682             : // instruction available unless it is ready in the current cycle.
    2683        6078 : bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
    2684        2212 :   if (SU->getHeight() > CurCycle) return false;
    2685             : 
    2686             :   if (SPQ->getHazardRec()->getHazardType(SU, 0)
    2687             :       != ScheduleHazardRecognizer::NoHazard)
    2688             :     return false;
    2689             : 
    2690             :   return true;
    2691             : }
    2692             : 
    2693             : static bool canEnableCoalescing(SUnit *SU) {
    2694             :   unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
    2695             :   if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
    2696             :     // CopyToReg should be close to its uses to facilitate coalescing and
    2697             :     // avoid spilling.
    2698             :     return true;
    2699             : 
    2700             :   if (Opc == TargetOpcode::EXTRACT_SUBREG ||
    2701             :       Opc == TargetOpcode::SUBREG_TO_REG ||
    2702             :       Opc == TargetOpcode::INSERT_SUBREG)
    2703             :     // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
    2704     8244326 :     // close to their uses to facilitate coalescing.
    2705     8244326 :     return true;
    2706     8244326 : 
    2707     4807380 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    2708             :     // If SU does not have a register def, schedule it close to its uses
    2709             :     // because it does not lengthen any live ranges.
    2710             :     return true;
    2711             : 
    2712     3436946 :   return false;
    2713      242936 : }
    2714             : 
    2715             : // list-ilp is currently an experimental scheduler that allows various
    2716             : // heuristics to be enabled prior to the normal register reduction logic.
    2717     3194010 : bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
    2718         401 :   if (int res = checkSpecialNodes(left, right))
    2719             :     return res > 0;
    2720             : 
    2721     3193609 :   if (left->isCall || right->isCall)
    2722     3193602 :     // No way to compute latency of calls.
    2723     3193580 :     return BURRSort(left, right, SPQ);
    2724     3193580 : 
    2725      636806 :   unsigned LLiveUses = 0, RLiveUses = 0;
    2726             :   int LPDiff = 0, RPDiff = 0;
    2727             :   if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
    2728          29 :     LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
    2729           0 :     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
    2730             :   }
    2731          29 :   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
    2732           8 :     LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
    2733             :                       << "): " << LPDiff << " != SU(" << right->NodeNum
    2734             :                       << "): " << RPDiff << "\n");
    2735             :     return LPDiff > RPDiff;
    2736             :   }
    2737     2556795 : 
    2738             :   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
    2739             :     bool LReduce = canEnableCoalescing(left);
    2740             :     bool RReduce = canEnableCoalescing(right);
    2741           0 :     if (LReduce && !RReduce) return false;
    2742             :     if (RReduce && !LReduce) return true;
    2743         352 :   }
    2744             : 
    2745      108202 :   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
    2746             :     LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
    2747             :                       << " != SU(" << right->NodeNum << "): " << RLiveUses
    2748             :                       << "\n");
    2749           0 :     return LLiveUses < RLiveUses;
    2750             :   }
    2751           0 : 
    2752             :   if (!DisableSchedStalls) {
    2753           0 :     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
    2754             :     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
    2755             :     if (LStall != RStall)
    2756             :       return left->getHeight() > right->getHeight();
    2757             :   }
    2758           0 : 
    2759           0 :   if (!DisableSchedCriticalPath) {
    2760             :     int spread = (int)left->getDepth() - (int)right->getDepth();
    2761           0 :     if (std::abs(spread) > MaxReorderWindow) {
    2762             :       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
    2763             :                         << left->getDepth() << " != SU(" << right->NodeNum
    2764             :                         << "): " << right->getDepth() << "\n");
    2765             :       return left->getDepth() < right->getDepth();
    2766             :     }
    2767             :   }
    2768             : 
    2769             :   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    2770             :     int spread = (int)left->getHeight() - (int)right->getHeight();
    2771             :     if (std::abs(spread) > MaxReorderWindow)
    2772             :       return left->getHeight() > right->getHeight();
    2773             :   }
    2774             : 
    2775             :   return BURRSort(left, right, SPQ);
    2776             : }
    2777             : 
    2778             : void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
    2779             :   SUnits = &sunits;
    2780             :   // Add pseudo dependency edges for two-address nodes.
    2781             :   if (!Disable2AddrHack)
    2782             :     AddPseudoTwoAddrDeps();
    2783           0 :   // Reroute edges to nodes with multiple uses.
    2784             :   if (!TracksRegPressure && !SrcOrder)
    2785           0 :     PrescheduleNodesWithMultipleUses();
    2786             :   // Calculate node priorities.
    2787           0 :   CalculateSethiUllmanNumbers();
    2788             : 
    2789           0 :   // For single block loops, mark nodes that look like canonical IV increments.
    2790             :   if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
    2791           0 :     for (SUnit &SU : sunits)
    2792           0 :       initVRegCycle(&SU);
    2793             : }
    2794             : 
    2795           0 : //===----------------------------------------------------------------------===//
    2796             : //                    Preschedule for Register Pressure
    2797             : //===----------------------------------------------------------------------===//
    2798           0 : 
    2799             : bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
    2800           0 :   if (SU->isTwoAddress) {
    2801             :     unsigned Opc = SU->getNode()->getMachineOpcode();
    2802             :     const MCInstrDesc &MCID = TII->get(Opc);
    2803           0 :     unsigned NumRes = MCID.getNumDefs();
    2804             :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    2805           0 :     for (unsigned i = 0; i != NumOps; ++i) {
    2806           0 :       if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
    2807           0 :         SDNode *DU = SU->getNode()->getOperand(i).getNode();
    2808           0 :         if (DU->getNodeId() != -1 &&
    2809             :             Op->OrigNode == &(*SUnits)[DU->getNodeId()])
    2810           0 :           return true;
    2811             :       }
    2812             :     }
    2813             :   }
    2814             :   return false;
    2815             : }
    2816             : 
    2817             : /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
    2818             : /// successor's explicit physregs whose definition can reach DepSU.
    2819             : /// i.e. DepSU should not be scheduled above SU.
    2820             : static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
    2821             :                                          ScheduleDAGRRList *scheduleDAG,
    2822             :                                          const TargetInstrInfo *TII,
    2823             :                                          const TargetRegisterInfo *TRI) {
    2824             :   const MCPhysReg *ImpDefs
    2825       33420 :     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
    2826       33420 :   const uint32_t *RegMask = getNodeRegMask(SU->getNode());
    2827       33420 :   if(!ImpDefs && !RegMask)
    2828             :     return false;
    2829             : 
    2830             :   for (const SDep &Succ : SU->Succs) {
    2831             :     SUnit *SuccSU = Succ.getSUnit();
    2832       52208 :     for (const SDep &SuccPred : SuccSU->Preds) {
    2833       26104 :       if (!SuccPred.isAssignedRegDep())
    2834             :         continue;
    2835             : 
    2836             :       if (RegMask &&
    2837             :           MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
    2838             :           scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2839       26104 :         return true;
    2840             : 
    2841             :       if (ImpDefs)
    2842           0 :         for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
    2843             :           // Return true if SU clobbers this physical register use and the
    2844             :           // definition of the register reaches from DepSU. IsReachable queries
    2845             :           // a topological forward sort of the DAG (following the successors).
    2846             :           if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
    2847             :               scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
    2848             :             return true;
    2849           0 :     }
    2850             :   }
    2851           0 :   return false;
    2852             : }
    2853           0 : 
    2854             : /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
    2855           0 : /// physical register defs.
    2856             : static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
    2857           0 :                                   const TargetInstrInfo *TII,
    2858             :                                   const TargetRegisterInfo *TRI) {
    2859           0 :   SDNode *N = SuccSU->getNode();
    2860           0 :   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
    2861           0 :   const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
    2862             :   assert(ImpDefs && "Caller should check hasPhysRegDefs");
    2863           0 :   for (const SDNode *SUNode = SU->getNode(); SUNode;
    2864             :        SUNode = SUNode->getGluedNode()) {
    2865             :     if (!SUNode->isMachineOpcode())
    2866             :       continue;
    2867           0 :     const MCPhysReg *SUImpDefs =
    2868             :       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
    2869             :     const uint32_t *SURegMask = getNodeRegMask(SUNode);
    2870           0 :     if (!SUImpDefs && !SURegMask)
    2871           0 :       continue;
    2872           0 :     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
    2873           0 :       MVT VT = N->getSimpleValueType(i);
    2874           0 :       if (VT == MVT::Glue || VT == MVT::Other)
    2875             :         continue;
    2876             :       if (!N->hasAnyUseOfValue(i))
    2877           0 :         continue;
    2878             :       unsigned Reg = ImpDefs[i - NumDefs];
    2879             :       if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
    2880             :         return true;
    2881           0 :       if (!SUImpDefs)
    2882             :         continue;
    2883             :       for (;*SUImpDefs; ++SUImpDefs) {
    2884           0 :         unsigned SUReg = *SUImpDefs;
    2885           0 :         if (TRI->regsOverlap(Reg, SUReg))
    2886           0 :           return true;
    2887           0 :       }
    2888           0 :     }
    2889             :   }
    2890             :   return false;
    2891           0 : }
    2892           0 : 
    2893           0 : /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
    2894             : /// are not handled well by the general register pressure reduction
    2895             : /// heuristics. When presented with code like this:
    2896             : ///
    2897           0 : ///      N
    2898             : ///    / |
    2899             : ///   /  |
    2900             : ///  U  store
    2901           0 : ///  |
    2902           0 : /// ...
    2903           0 : ///
    2904           0 : /// the heuristics tend to push the store up, but since the
    2905             : /// operand of the store has another use (U), this would increase
    2906             : /// the length of that other use (the U->N edge).
    2907           0 : ///
    2908             : /// This function transforms code like the above to route U's
    2909             : /// dependence through the store when possible, like this:
    2910     1269002 : ///
    2911     1269002 : ///      N
    2912             : ///      ||
    2913     1269002 : ///      ||
    2914           0 : ///     store
    2915             : ///       |
    2916     1269002 : ///       U
    2917        9613 : ///       |
    2918             : ///      ...
    2919     1269002 : ///
    2920             : /// This results in the store being scheduled immediately
    2921             : /// after N, which shortens the U->N live range, reducing
    2922     1269002 : /// register pressure.
    2923       88428 : void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
    2924       82971 :   // Visit all the nodes in topological order, working top-down.
    2925     1269001 :   for (SUnit &SU : *SUnits) {
    2926             :     // For now, only look at nodes with no data successors, such as stores.
    2927             :     // These are especially important, due to the heuristics in
    2928             :     // getNodePriority for nodes with no data successors.
    2929             :     if (SU.NumSuccs != 0)
    2930             :       continue;
    2931           0 :     // For now, only look at nodes with exactly one data predecessor.
    2932           0 :     if (SU.NumPreds != 1)
    2933           0 :       continue;
    2934           0 :     // Avoid prescheduling copies to virtual registers, which don't behave
    2935           0 :     // like other nodes from the perspective of scheduling heuristics.
    2936           0 :     if (SDNode *N = SU.getNode())
    2937           0 :       if (N->getOpcode() == ISD::CopyToReg &&
    2938           0 :           TargetRegisterInfo::isVirtualRegister
    2939           0 :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2940           0 :         continue;
    2941           0 : 
    2942           0 :     // Locate the single data predecessor.
    2943             :     SUnit *PredSU = nullptr;
    2944             :     for (const SDep &Pred : SU.Preds)
    2945             :       if (!Pred.isCtrl()) {
    2946             :         PredSU = Pred.getSUnit();
    2947             :         break;
    2948             :       }
    2949             :     assert(PredSU);
    2950             : 
    2951             :     // Don't rewrite edges that carry physregs, because that requires additional
    2952           0 :     // support infrastructure.
    2953             :     if (PredSU->hasPhysRegDefs)
    2954             :       continue;
    2955             :     // Short-circuit the case where SU is PredSU's only data successor.
    2956             :     if (PredSU->NumSuccs == 1)
    2957           0 :       continue;
    2958             :     // Avoid prescheduling to copies from virtual registers, which don't behave
    2959           0 :     // like other nodes from the perspective of scheduling heuristics.
    2960             :     if (SDNode *N = SU.getNode())
    2961             :       if (N->getOpcode() == ISD::CopyFromReg &&
    2962           0 :           TargetRegisterInfo::isVirtualRegister
    2963             :             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
    2964           0 :         continue;
    2965             : 
    2966             :     // Perform checks on the successors of PredSU.
    2967             :     for (const SDep &PredSucc : PredSU->Succs) {
    2968           0 :       SUnit *PredSuccSU = PredSucc.getSUnit();
    2969           0 :       if (PredSuccSU == &SU) continue;
    2970             :       // If PredSU has another successor with no data successors, for
    2971             :       // now don't attempt to choose either over the other.
    2972             :       if (PredSuccSU->NumSuccs == 0)
    2973           0 :         goto outer_loop_continue;
    2974           0 :       // Don't break physical register dependencies.
    2975             :       if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
    2976             :         if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
    2977             :           goto outer_loop_continue;
    2978           0 :       // Don't introduce graph cycles.
    2979             :       if (scheduleDAG->IsReachable(&SU, PredSuccSU))
    2980             :         goto outer_loop_continue;
    2981             :     }
    2982             : 
    2983             :     // Ok, the transformation is safe and the heuristics suggest it is
    2984             :     // profitable. Update the graph.
    2985             :     LLVM_DEBUG(
    2986             :         dbgs() << "    Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
    2987             :                << PredSU->NodeNum
    2988           4 :                << " to guide scheduling in the presence of multiple uses\n");
    2989             :     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
    2990             :       SDep Edge = PredSU->Succs[i];
    2991           4 :       assert(!Edge.isAssignedRegDep());
    2992          12 :       SUnit *SuccSU = Edge.getSUnit();
    2993           4 :       if (SuccSU != &SU) {
    2994             :         Edge.setSUnit(PredSU);
    2995           8 :         scheduleDAG->RemovePred(SuccSU, Edge);
    2996             :         scheduleDAG->AddPred(&SU, Edge);
    2997           8 :         Edge.setSUnit(&SU);
    2998             :         scheduleDAG->AddPred(SuccSU, Edge);
    2999             :         --i;
    3000          16 :       }
    3001             :     }
    3002           8 :   outer_loop_continue:;
    3003             :   }
    3004          12 : }
    3005             : 
    3006           8 : /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
    3007             : /// it as a def&use operand. Add a pseudo control edge from it to the other
    3008           8 : /// node (if it won't create a cycle) so the two-address one will be scheduled
    3009             : /// first (lower in the schedule). If both nodes are two-address, favor the
    3010           8 : /// one that has a CopyToReg use (more likely to be a loop induction update).
    3011           8 : /// If both are two-address, but one is commutable while the other is not
    3012             : /// commutable, favor the one that's not commutable.
    3013           4 : void RegReductionPQBase::AddPseudoTwoAddrDeps() {
    3014             :   for (SUnit &SU : *SUnits) {
    3015           8 :     if (!SU.isTwoAddress)
    3016           4 :       continue;
    3017           4 : 
    3018             :     SDNode *Node = SU.getNode();
    3019             :     if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
    3020             :       continue;
    3021             : 
    3022             :     bool isLiveOut = hasOnlyLiveOutUses(&SU);
    3023             :     unsigned Opc = Node->getMachineOpcode();
    3024             :     const MCInstrDesc &MCID = TII->get(Opc);
    3025             :     unsigned NumRes = MCID.getNumDefs();
    3026             :     unsigned NumOps = MCID.getNumOperands() - NumRes;
    3027             :     for (unsigned j = 0; j != NumOps; ++j) {
    3028             :       if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
    3029             :         continue;
    3030             :       SDNode *DU = SU.getNode()->getOperand(j).getNode();
    3031             :       if (DU->getNodeId() == -1)
    3032             :         continue;
    3033             :       const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
    3034             :       if (!DUSU)
    3035             :         continue;
    3036             :       for (const SDep &Succ : DUSU->Succs) {
    3037             :         if (Succ.isCtrl())
    3038             :           continue;
    3039             :         SUnit *SuccSU = Succ.getSUnit();
    3040             :         if (SuccSU == &SU)
    3041             :           continue;
    3042             :         // Be conservative. Ignore if nodes aren't at roughly the same
    3043             :         // depth and height.
    3044             :         if (SuccSU->getHeight() < SU.getHeight() &&
    3045             :             (SU.getHeight() - SuccSU->getHeight()) > 1)
    3046             :           continue;
    3047             :         // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
    3048             :         // constrains whatever is using the copy, instead of the copy
    3049             :         // itself. In the case that the copy is coalesced, this
    3050             :         // preserves the intent of the pseudo two-address heurietics.
    3051             :         while (SuccSU->Succs.size() == 1 &&
    3052             :                SuccSU->getNode()->isMachineOpcode() &&
    3053             :                SuccSU->getNode()->getMachineOpcode() ==
    3054             :                  TargetOpcode::COPY_TO_REGCLASS)
    3055        9613 :           SuccSU = SuccSU->Succs.front().getSUnit();
    3056             :         // Don't constrain non-instruction nodes.
    3057       80032 :         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
    3058             :           continue;
    3059             :         // Don't constrain nodes with physical register defs if the
    3060             :         // predecessor can clobber them.
    3061       70419 :         if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
    3062             :           if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
    3063             :             continue;
    3064       28030 :         }
    3065             :         // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
    3066             :         // these may be coalesced away. We want them close to their uses.
    3067             :         unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
    3068        9857 :         if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
    3069        9857 :             SuccOpc == TargetOpcode::INSERT_SUBREG ||
    3070             :             SuccOpc == TargetOpcode::SUBREG_TO_REG)
    3071        5930 :           continue;
    3072             :         if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
    3073             :             (!canClobber(SuccSU, DUSU) ||
    3074             :              (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
    3075             :              (!SU.isCommutable && SuccSU->isCommutable)) &&
    3076        7900 :             !scheduleDAG->IsReachable(SuccSU, &SU)) {
    3077        7900 :           LLVM_DEBUG(dbgs()
    3078             :                      << "    Adding a pseudo-two-addr edge from SU #"
    3079        6955 :                      << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
    3080             :           scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
    3081             :         }
    3082             :       }
    3083             :     }
    3084             :   }
    3085        6955 : }
    3086             : 
    3087             : //===----------------------------------------------------------------------===//
    3088        6942 : //                         Public Constructor Functions
    3089             : //===----------------------------------------------------------------------===//
    3090             : 
    3091             : ScheduleDAGSDNodes *
    3092         679 : llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
    3093         679 :                                  CodeGenOpt::Level OptLevel) {
    3094             :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3095          14 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3096             :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3097             : 
    3098             :   BURegReductionPriorityQueue *PQ =
    3099        1188 :     new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
    3100             :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3101        1121 :   PQ->setScheduleDAG(SD);
    3102             :   return SD;
    3103             : }
    3104         706 : 
    3105             : ScheduleDAGSDNodes *
    3106             : llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
    3107         124 :                                    CodeGenOpt::Level OptLevel) {
    3108           4 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3109             :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3110             :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3111         120 : 
    3112             :   SrcRegReductionPriorityQueue *PQ =
    3113             :     new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
    3114             :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
    3115             :   PQ->setScheduleDAG(SD);
    3116             :   return SD;
    3117             : }
    3118             : 
    3119             : ScheduleDAGSDNodes *
    3120             : llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
    3121         213 :                                    CodeGenOpt::Level OptLevel) {
    3122         146 :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3123             :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3124             :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3125         146 :   const TargetLowering *TLI = IS->TLI;
    3126             : 
    3127          79 :   HybridBURRPriorityQueue *PQ =
    3128          79 :     new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3129             : 
    3130          79 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3131          79 :   PQ->setScheduleDAG(SD);
    3132             :   return SD;
    3133             : }
    3134       70419 : 
    3135             : ScheduleDAGSDNodes *
    3136        9613 : llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
    3137             :                                 CodeGenOpt::Level OptLevel) {
    3138             :   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
    3139             :   const TargetInstrInfo *TII = STI.getInstrInfo();
    3140             :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    3141             :   const TargetLowering *TLI = IS->TLI;
    3142             : 
    3143             :   ILPBURRPriorityQueue *PQ =
    3144             :     new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
    3145           0 :   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
    3146           0 :   PQ->setScheduleDAG(SD);
    3147           0 :   return SD;
    3148             : }

Generated by: LCOV version 1.13