LCOV - code coverage report
Current view: top level - lib/CodeGen - MachinePipeliner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1557 1635 95.2 %
Date: 2018-02-21 17:27:13 Functions: 95 96 99.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
      11             : //
      12             : // Software pipelining (SWP) is an instruction scheduling technique for loops
      13             : // that overlap loop iterations and explioits ILP via a compiler transformation.
      14             : //
      15             : // Swing Modulo Scheduling is an implementation of software pipelining
      16             : // that generates schedules that are near optimal in terms of initiation
      17             : // interval, register requirements, and stage count. See the papers:
      18             : //
      19             : // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
      20             : // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
      21             : // Conference on Parallel Architectures and Compilation Techiniques.
      22             : //
      23             : // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
      24             : // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
      25             : // Transactions on Computers, Vol. 50, No. 3, 2001.
      26             : //
      27             : // "An Implementation of Swing Modulo Scheduling With Extensions for
      28             : // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
      29             : // Urbana-Chambpain, 2005.
      30             : //
      31             : //
      32             : // The SMS algorithm consists of three main steps after computing the minimal
      33             : // initiation interval (MII).
      34             : // 1) Analyze the dependence graph and compute information about each
      35             : //    instruction in the graph.
      36             : // 2) Order the nodes (instructions) by priority based upon the heuristics
      37             : //    described in the algorithm.
      38             : // 3) Attempt to schedule the nodes in the specified order using the MII.
      39             : //
      40             : // This SMS implementation is a target-independent back-end pass. When enabled,
      41             : // the pass runs just prior to the register allocation pass, while the machine
      42             : // IR is in SSA form. If software pipelining is successful, then the original
      43             : // loop is replaced by the optimized loop. The optimized loop contains one or
      44             : // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
      45             : // the instructions cannot be scheduled in a given MII, we increase the MII by
      46             : // one and try again.
      47             : //
      48             : // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
      49             : // represent loop carried dependences in the DAG as order edges to the Phi
      50             : // nodes. We also perform several passes over the DAG to eliminate unnecessary
      51             : // edges that inhibit the ability to pipeline. The implementation uses the
      52             : // DFAPacketizer class to compute the minimum initiation interval and the check
      53             : // where an instruction may be inserted in the pipelined schedule.
      54             : //
      55             : // In order for the SMS pass to work, several target specific hooks need to be
      56             : // implemented to get information about the loop structure and to rewrite
      57             : // instructions.
      58             : //
      59             : //===----------------------------------------------------------------------===//
      60             : 
      61             : #include "llvm/ADT/ArrayRef.h"
      62             : #include "llvm/ADT/BitVector.h"
      63             : #include "llvm/ADT/DenseMap.h"
      64             : #include "llvm/ADT/MapVector.h"
      65             : #include "llvm/ADT/PriorityQueue.h"
      66             : #include "llvm/ADT/SetVector.h"
      67             : #include "llvm/ADT/SmallPtrSet.h"
      68             : #include "llvm/ADT/SmallSet.h"
      69             : #include "llvm/ADT/SmallVector.h"
      70             : #include "llvm/ADT/Statistic.h"
      71             : #include "llvm/ADT/iterator_range.h"
      72             : #include "llvm/Analysis/AliasAnalysis.h"
      73             : #include "llvm/Analysis/MemoryLocation.h"
      74             : #include "llvm/Analysis/ValueTracking.h"
      75             : #include "llvm/CodeGen/DFAPacketizer.h"
      76             : #include "llvm/CodeGen/LiveIntervals.h"
      77             : #include "llvm/CodeGen/MachineBasicBlock.h"
      78             : #include "llvm/CodeGen/MachineDominators.h"
      79             : #include "llvm/CodeGen/MachineFunction.h"
      80             : #include "llvm/CodeGen/MachineFunctionPass.h"
      81             : #include "llvm/CodeGen/MachineInstr.h"
      82             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      83             : #include "llvm/CodeGen/MachineLoopInfo.h"
      84             : #include "llvm/CodeGen/MachineMemOperand.h"
      85             : #include "llvm/CodeGen/MachineOperand.h"
      86             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      87             : #include "llvm/CodeGen/RegisterClassInfo.h"
      88             : #include "llvm/CodeGen/RegisterPressure.h"
      89             : #include "llvm/CodeGen/ScheduleDAG.h"
      90             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      91             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      92             : #include "llvm/CodeGen/TargetInstrInfo.h"
      93             : #include "llvm/CodeGen/TargetOpcodes.h"
      94             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      95             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      96             : #include "llvm/IR/Attributes.h"
      97             : #include "llvm/IR/DebugLoc.h"
      98             : #include "llvm/IR/Function.h"
      99             : #include "llvm/MC/LaneBitmask.h"
     100             : #include "llvm/MC/MCInstrDesc.h"
     101             : #include "llvm/MC/MCInstrItineraries.h"
     102             : #include "llvm/MC/MCRegisterInfo.h"
     103             : #include "llvm/Pass.h"
     104             : #include "llvm/Support/CommandLine.h"
     105             : #include "llvm/Support/Compiler.h"
     106             : #include "llvm/Support/Debug.h"
     107             : #include "llvm/Support/MathExtras.h"
     108             : #include "llvm/Support/raw_ostream.h"
     109             : #include <algorithm>
     110             : #include <cassert>
     111             : #include <climits>
     112             : #include <cstdint>
     113             : #include <deque>
     114             : #include <functional>
     115             : #include <iterator>
     116             : #include <map>
     117             : #include <memory>
     118             : #include <tuple>
     119             : #include <utility>
     120             : #include <vector>
     121             : 
     122             : using namespace llvm;
     123             : 
     124             : #define DEBUG_TYPE "pipeliner"
     125             : 
     126             : STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
     127             : STATISTIC(NumPipelined, "Number of loops software pipelined");
     128             : 
     129             : /// A command line option to turn software pipelining on or off.
     130      291579 : static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
     131             :                                cl::ZeroOrMore,
     132      291579 :                                cl::desc("Enable Software Pipelining"));
     133             : 
     134             : /// A command line option to enable SWP at -Os.
     135       97193 : static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
     136       97193 :                                       cl::desc("Enable SWP at Os."), cl::Hidden,
     137      291579 :                                       cl::init(false));
     138             : 
     139             : /// A command line argument to limit minimum initial interval for pipelining.
     140       97193 : static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
     141       97193 :                               cl::desc("Size limit for the MII."),
     142      291579 :                               cl::Hidden, cl::init(27));
     143             : 
     144             : /// A command line argument to limit the number of stages in the pipeline.
     145             : static cl::opt<int>
     146       97193 :     SwpMaxStages("pipeliner-max-stages",
     147       97193 :                  cl::desc("Maximum stages allowed in the generated scheduled."),
     148      291579 :                  cl::Hidden, cl::init(3));
     149             : 
     150             : /// A command line option to disable the pruning of chain dependences due to
     151             : /// an unrelated Phi.
     152             : static cl::opt<bool>
     153       97193 :     SwpPruneDeps("pipeliner-prune-deps",
     154       97193 :                  cl::desc("Prune dependences between unrelated Phi nodes."),
     155      291579 :                  cl::Hidden, cl::init(true));
     156             : 
     157             : /// A command line option to disable the pruning of loop carried order
     158             : /// dependences.
     159             : static cl::opt<bool>
     160       97193 :     SwpPruneLoopCarried("pipeliner-prune-loop-carried",
     161       97193 :                         cl::desc("Prune loop carried order dependences."),
     162      291579 :                         cl::Hidden, cl::init(true));
     163             : 
     164             : #ifndef NDEBUG
     165             : static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
     166             : #endif
     167             : 
     168       97193 : static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
     169      194386 :                                      cl::ReallyHidden, cl::init(false),
     170      291579 :                                      cl::ZeroOrMore, cl::desc("Ignore RecMII"));
     171             : 
     172             : namespace {
     173             : 
     174             : class NodeSet;
     175             : class SMSchedule;
     176             : 
     177             : /// The main class in the implementation of the target independent
     178             : /// software pipeliner pass.
     179        1440 : class MachinePipeliner : public MachineFunctionPass {
     180             : public:
     181             :   MachineFunction *MF = nullptr;
     182             :   const MachineLoopInfo *MLI = nullptr;
     183             :   const MachineDominatorTree *MDT = nullptr;
     184             :   const InstrItineraryData *InstrItins;
     185             :   const TargetInstrInfo *TII = nullptr;
     186             :   RegisterClassInfo RegClassInfo;
     187             : 
     188             : #ifndef NDEBUG
     189             :   static int NumTries;
     190             : #endif
     191             : 
     192             :   /// Cache the target analysis information about the loop.
     193        1440 :   struct LoopInfo {
     194             :     MachineBasicBlock *TBB = nullptr;
     195             :     MachineBasicBlock *FBB = nullptr;
     196             :     SmallVector<MachineOperand, 4> BrCond;
     197             :     MachineInstr *LoopInductionVar = nullptr;
     198             :     MachineInstr *LoopCompare = nullptr;
     199             :   };
     200             :   LoopInfo LI;
     201             : 
     202             :   static char ID;
     203             : 
     204         480 :   MachinePipeliner() : MachineFunctionPass(ID) {
     205         480 :     initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
     206         480 :   }
     207             : 
     208             :   bool runOnMachineFunction(MachineFunction &MF) override;
     209             : 
     210         476 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     211             :     AU.addRequired<AAResultsWrapperPass>();
     212             :     AU.addPreserved<AAResultsWrapperPass>();
     213             :     AU.addRequired<MachineLoopInfo>();
     214             :     AU.addRequired<MachineDominatorTree>();
     215             :     AU.addRequired<LiveIntervals>();
     216         476 :     MachineFunctionPass::getAnalysisUsage(AU);
     217         476 :   }
     218             : 
     219             : private:
     220             :   bool canPipelineLoop(MachineLoop &L);
     221             :   bool scheduleLoop(MachineLoop &L);
     222             :   bool swingModuloScheduler(MachineLoop &L);
     223             : };
     224             : 
     225             : /// This class builds the dependence graph for the instructions in a loop,
     226             : /// and attempts to schedule the instructions using the SMS algorithm.
     227         342 : class SwingSchedulerDAG : public ScheduleDAGInstrs {
     228             :   MachinePipeliner &Pass;
     229             :   /// The minimum initiation interval between iterations for this schedule.
     230             :   unsigned MII = 0;
     231             :   /// Set to true if a valid pipelined schedule is found for the loop.
     232             :   bool Scheduled = false;
     233             :   MachineLoop &Loop;
     234             :   LiveIntervals &LIS;
     235             :   const RegisterClassInfo &RegClassInfo;
     236             : 
     237             :   /// A toplogical ordering of the SUnits, which is needed for changing
     238             :   /// dependences and iterating over the SUnits.
     239             :   ScheduleDAGTopologicalSort Topo;
     240             : 
     241             :   struct NodeInfo {
     242             :     int ASAP = 0;
     243             :     int ALAP = 0;
     244             : 
     245             :     NodeInfo() = default;
     246             :   };
     247             :   /// Computed properties for each node in the graph.
     248             :   std::vector<NodeInfo> ScheduleInfo;
     249             : 
     250             :   enum OrderKind { BottomUp = 0, TopDown = 1 };
     251             :   /// Computed node ordering for scheduling.
     252             :   SetVector<SUnit *> NodeOrder;
     253             : 
     254             :   using NodeSetType = SmallVector<NodeSet, 8>;
     255             :   using ValueMapTy = DenseMap<unsigned, unsigned>;
     256             :   using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
     257             :   using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
     258             : 
     259             :   /// Instructions to change when emitting the final schedule.
     260             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
     261             : 
     262             :   /// We may create a new instruction, so remember it because it
     263             :   /// must be deleted when the pass is finished.
     264             :   SmallPtrSet<MachineInstr *, 4> NewMIs;
     265             : 
     266             :   /// Ordered list of DAG postprocessing steps.
     267             :   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
     268             : 
     269             :   /// Helper class to implement Johnson's circuit finding algorithm.
     270         228 :   class Circuits {
     271             :     std::vector<SUnit> &SUnits;
     272             :     SetVector<SUnit *> Stack;
     273             :     BitVector Blocked;
     274             :     SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
     275             :     SmallVector<SmallVector<int, 4>, 16> AdjK;
     276             :     unsigned NumPaths;
     277             :     static unsigned MaxPaths;
     278             : 
     279             :   public:
     280         114 :     Circuits(std::vector<SUnit> &SUs)
     281         798 :         : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
     282             : 
     283             :     /// Reset the data structures used in the circuit algorithm.
     284         880 :     void reset() {
     285             :       Stack.clear();
     286             :       Blocked.reset();
     287        3520 :       B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
     288         880 :       NumPaths = 0;
     289         880 :     }
     290             : 
     291             :     void createAdjacencyStructure(SwingSchedulerDAG *DAG);
     292             :     bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
     293             :     void unblock(int U);
     294             :   };
     295             : 
     296             : public:
     297         114 :   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
     298             :                     const RegisterClassInfo &rci)
     299         228 :       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
     300         456 :         RegClassInfo(rci), Topo(SUnits, &ExitSU) {
     301         114 :     P.MF->getSubtarget().getSMSMutations(Mutations);
     302         114 :   }
     303             : 
     304             :   void schedule() override;
     305             :   void finishBlock() override;
     306             : 
     307             :   /// Return true if the loop kernel has been scheduled.
     308             :   bool hasNewSchedule() { return Scheduled; }
     309             : 
     310             :   /// Return the earliest time an instruction may be scheduled.
     311        5307 :   int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
     312             : 
     313             :   /// Return the latest time an instruction my be scheduled.
     314        6284 :   int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
     315             : 
     316             :   /// The mobility function, which the number of slots in which
     317             :   /// an instruction may be scheduled.
     318        4246 :   int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
     319             : 
     320             :   /// The depth, in the dependence graph, for a node.
     321         645 :   int getDepth(SUnit *Node) { return Node->getDepth(); }
     322             : 
     323             :   /// The height, in the dependence graph, for a node.
     324        3274 :   int getHeight(SUnit *Node) { return Node->getHeight(); }
     325             : 
     326             :   /// Return true if the dependence is a back-edge in the data dependence graph.
     327             :   /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
     328             :   /// using an anti dependence from a Phi to an instruction.
     329       26042 :   bool isBackedge(SUnit *Source, const SDep &Dep) {
     330       26042 :     if (Dep.getKind() != SDep::Anti)
     331             :       return false;
     332        5097 :     return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
     333             :   }
     334             : 
     335             :   /// Return true if the dependence is an order dependence between non-Phis.
     336        3157 :   static bool isOrder(SUnit *Source, const SDep &Dep) {
     337        3157 :     if (Dep.getKind() != SDep::Order)
     338             :       return false;
     339         919 :     return (!Source->getInstr()->isPHI() &&
     340         919 :             !Dep.getSUnit()->getInstr()->isPHI());
     341             :   }
     342             : 
     343             :   bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
     344             : 
     345             :   /// The latency of the dependence.
     346        4017 :   unsigned getLatency(SUnit *Source, const SDep &Dep) {
     347             :     // Anti dependences represent recurrences, so use the latency of the
     348             :     // instruction on the back-edge.
     349        4017 :     if (Dep.getKind() == SDep::Anti) {
     350         266 :       if (Source->getInstr()->isPHI())
     351         227 :         return Dep.getSUnit()->Latency;
     352          39 :       if (Dep.getSUnit()->getInstr()->isPHI())
     353          29 :         return Source->Latency;
     354          10 :       return Dep.getLatency();
     355             :     }
     356        3751 :     return Dep.getLatency();
     357             :   }
     358             : 
     359             :   /// The distance function, which indicates that operation V of iteration I
     360             :   /// depends on operations U of iteration I-distance.
     361             :   unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
     362             :     // Instructions that feed a Phi have a distance of 1. Computing larger
     363             :     // values for arrays requires data dependence information.
     364        2243 :     if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
     365             :       return 1;
     366             :     return 0;
     367             :   }
     368             : 
     369             :   /// Set the Minimum Initiation Interval for this schedule attempt.
     370             :   void setMII(unsigned mii) { MII = mii; }
     371             : 
     372             :   void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
     373             : 
     374             :   void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
     375             : 
     376             :   /// Return the new base register that was stored away for the changed
     377             :   /// instruction.
     378             :   unsigned getInstrBaseReg(SUnit *SU) {
     379             :     DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
     380         200 :         InstrChanges.find(SU);
     381         200 :     if (It != InstrChanges.end())
     382          18 :       return It->second.first;
     383             :     return 0;
     384             :   }
     385             : 
     386             :   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
     387             :     Mutations.push_back(std::move(Mutation));
     388             :   }
     389             : 
     390             : private:
     391             :   void addLoopCarriedDependences(AliasAnalysis *AA);
     392             :   void updatePhiDependences();
     393             :   void changeDependences();
     394             :   unsigned calculateResMII();
     395             :   unsigned calculateRecMII(NodeSetType &RecNodeSets);
     396             :   void findCircuits(NodeSetType &NodeSets);
     397             :   void fuseRecs(NodeSetType &NodeSets);
     398             :   void removeDuplicateNodes(NodeSetType &NodeSets);
     399             :   void computeNodeFunctions(NodeSetType &NodeSets);
     400             :   void registerPressureFilter(NodeSetType &NodeSets);
     401             :   void colocateNodeSets(NodeSetType &NodeSets);
     402             :   void checkNodeSets(NodeSetType &NodeSets);
     403             :   void groupRemainingNodes(NodeSetType &NodeSets);
     404             :   void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
     405             :                          SetVector<SUnit *> &NodesAdded);
     406             :   void computeNodeOrder(NodeSetType &NodeSets);
     407             :   bool schedulePipeline(SMSchedule &Schedule);
     408             :   void generatePipelinedLoop(SMSchedule &Schedule);
     409             :   void generateProlog(SMSchedule &Schedule, unsigned LastStage,
     410             :                       MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
     411             :                       MBBVectorTy &PrologBBs);
     412             :   void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
     413             :                       MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
     414             :                       MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
     415             :   void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
     416             :                             MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
     417             :                             SMSchedule &Schedule, ValueMapTy *VRMap,
     418             :                             InstrMapTy &InstrMap, unsigned LastStageNum,
     419             :                             unsigned CurStageNum, bool IsLast);
     420             :   void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
     421             :                     MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
     422             :                     SMSchedule &Schedule, ValueMapTy *VRMap,
     423             :                     InstrMapTy &InstrMap, unsigned LastStageNum,
     424             :                     unsigned CurStageNum, bool IsLast);
     425             :   void removeDeadInstructions(MachineBasicBlock *KernelBB,
     426             :                               MBBVectorTy &EpilogBBs);
     427             :   void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
     428             :                       SMSchedule &Schedule);
     429             :   void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
     430             :                    MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
     431             :                    ValueMapTy *VRMap);
     432             :   bool computeDelta(MachineInstr &MI, unsigned &Delta);
     433             :   void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
     434             :                          unsigned Num);
     435             :   MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
     436             :                            unsigned InstStageNum);
     437             :   MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
     438             :                                     unsigned InstStageNum,
     439             :                                     SMSchedule &Schedule);
     440             :   void updateInstruction(MachineInstr *NewMI, bool LastDef,
     441             :                          unsigned CurStageNum, unsigned InstStageNum,
     442             :                          SMSchedule &Schedule, ValueMapTy *VRMap);
     443             :   MachineInstr *findDefInLoop(unsigned Reg);
     444             :   unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
     445             :                          unsigned LoopStage, ValueMapTy *VRMap,
     446             :                          MachineBasicBlock *BB);
     447             :   void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
     448             :                         SMSchedule &Schedule, ValueMapTy *VRMap,
     449             :                         InstrMapTy &InstrMap);
     450             :   void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
     451             :                              InstrMapTy &InstrMap, unsigned CurStageNum,
     452             :                              unsigned PhiNum, MachineInstr *Phi,
     453             :                              unsigned OldReg, unsigned NewReg,
     454             :                              unsigned PrevReg = 0);
     455             :   bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
     456             :                              unsigned &OffsetPos, unsigned &NewBase,
     457             :                              int64_t &NewOffset);
     458             :   void postprocessDAG();
     459             : };
     460             : 
     461             : /// A NodeSet contains a set of SUnit DAG nodes with additional information
     462             : /// that assigns a priority to the set.
     463        2107 : class NodeSet {
     464             :   SetVector<SUnit *> Nodes;
     465             :   bool HasRecurrence = false;
     466             :   unsigned RecMII = 0;
     467             :   int MaxMOV = 0;
     468             :   int MaxDepth = 0;
     469             :   unsigned Colocate = 0;
     470             :   SUnit *ExceedPressure = nullptr;
     471             : 
     472             : public:
     473             :   using iterator = SetVector<SUnit *>::const_iterator;
     474             : 
     475         114 :   NodeSet() = default;
     476         233 :   NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
     477             : 
     478         554 :   bool insert(SUnit *SU) { return Nodes.insert(SU); }
     479             : 
     480          29 :   void insert(iterator S, iterator E) { Nodes.insert(S, E); }
     481             : 
     482             :   template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
     483         209 :     return Nodes.remove_if(P);
     484             :   }
     485             : 
     486             :   unsigned count(SUnit *SU) const { return Nodes.count(SU); }
     487             : 
     488             :   bool hasRecurrence() { return HasRecurrence; };
     489             : 
     490         441 :   unsigned size() const { return Nodes.size(); }
     491             : 
     492             :   bool empty() const { return Nodes.empty(); }
     493             : 
     494         514 :   SUnit *getNode(unsigned i) const { return Nodes[i]; };
     495             : 
     496         238 :   void setRecMII(unsigned mii) { RecMII = mii; };
     497             : 
     498           7 :   void setColocate(unsigned c) { Colocate = c; };
     499             : 
     500           1 :   void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
     501             : 
     502             :   bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
     503             : 
     504          25 :   int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
     505             : 
     506             :   int getRecMII() { return RecMII; }
     507             : 
     508             :   /// Summarize node functions for the entire node set.
     509         208 :   void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
     510         697 :     for (SUnit *SU : *this) {
     511         978 :       MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
     512         978 :       MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
     513             :     }
     514         208 :   }
     515             : 
     516             :   void clear() {
     517             :     Nodes.clear();
     518         116 :     RecMII = 0;
     519         116 :     HasRecurrence = false;
     520         116 :     MaxMOV = 0;
     521         116 :     MaxDepth = 0;
     522         116 :     Colocate = 0;
     523         116 :     ExceedPressure = nullptr;
     524             :   }
     525             : 
     526         514 :   operator SetVector<SUnit *> &() { return Nodes; }
     527             : 
     528             :   /// Sort the node sets by importance. First, rank them by recurrence MII,
     529             :   /// then by mobility (least mobile done first), and finally by depth.
     530             :   /// Each node set may contain a colocate value which is used as the first
     531             :   /// tie breaker, if it's set.
     532             :   bool operator>(const NodeSet &RHS) const {
     533         198 :     if (RecMII == RHS.RecMII) {
     534         158 :       if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
     535           0 :         return Colocate < RHS.Colocate;
     536         158 :       if (MaxMOV == RHS.MaxMOV)
     537         109 :         return MaxDepth > RHS.MaxDepth;
     538          49 :       return MaxMOV < RHS.MaxMOV;
     539             :     }
     540          40 :     return RecMII > RHS.RecMII;
     541             :   }
     542             : 
     543             :   bool operator==(const NodeSet &RHS) const {
     544             :     return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
     545             :            MaxDepth == RHS.MaxDepth;
     546             :   }
     547             : 
     548             :   bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
     549             : 
     550             :   iterator begin() { return Nodes.begin(); }
     551             :   iterator end() { return Nodes.end(); }
     552             : 
     553             :   void print(raw_ostream &os) const {
     554             :     os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
     555             :        << " depth " << MaxDepth << " col " << Colocate << "\n";
     556             :     for (const auto &I : Nodes)
     557             :       os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
     558             :     os << "\n";
     559             :   }
     560             : 
     561             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     562             :   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
     563             : #endif
     564             : };
     565             : 
     566             : /// This class repesents the scheduled code.  The main data structure is a
     567             : /// map from scheduled cycle to instructions.  During scheduling, the
     568             : /// data structure explicitly represents all stages/iterations.   When
     569             : /// the algorithm finshes, the schedule is collapsed into a single stage,
     570             : /// which represents instructions from different loop iterations.
     571             : ///
     572             : /// The SMS algorithm allows negative values for cycles, so the first cycle
     573             : /// in the schedule is the smallest cycle value.
     574         342 : class SMSchedule {
     575             : private:
     576             :   /// Map from execution cycle to instructions.
     577             :   DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
     578             : 
     579             :   /// Map from instruction to execution cycle.
     580             :   std::map<SUnit *, int> InstrToCycle;
     581             : 
     582             :   /// Map for each register and the max difference between its uses and def.
     583             :   /// The first element in the pair is the max difference in stages. The
     584             :   /// second is true if the register defines a Phi value and loop value is
     585             :   /// scheduled before the Phi.
     586             :   std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
     587             : 
     588             :   /// Keep track of the first cycle value in the schedule.  It starts
     589             :   /// as zero, but the algorithm allows negative values.
     590             :   int FirstCycle = 0;
     591             : 
     592             :   /// Keep track of the last cycle value in the schedule.
     593             :   int LastCycle = 0;
     594             : 
     595             :   /// The initiation interval (II) for the schedule.
     596             :   int InitiationInterval = 0;
     597             : 
     598             :   /// Target machine information.
     599             :   const TargetSubtargetInfo &ST;
     600             : 
     601             :   /// Virtual register information.
     602             :   MachineRegisterInfo &MRI;
     603             : 
     604             :   std::unique_ptr<DFAPacketizer> Resources;
     605             : 
     606             : public:
     607         114 :   SMSchedule(MachineFunction *mf)
     608         342 :       : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
     609         570 :         Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
     610             : 
     611         161 :   void reset() {
     612         161 :     ScheduledInstrs.clear();
     613             :     InstrToCycle.clear();
     614             :     RegToStageDiff.clear();
     615         161 :     FirstCycle = 0;
     616         161 :     LastCycle = 0;
     617         161 :     InitiationInterval = 0;
     618         161 :   }
     619             : 
     620             :   /// Set the initiation interval for this schedule.
     621         157 :   void setInitiationInterval(int ii) { InitiationInterval = ii; }
     622             : 
     623             :   /// Return the first cycle in the completed schedule.  This
     624             :   /// can be a negative value.
     625             :   int getFirstCycle() const { return FirstCycle; }
     626             : 
     627             :   /// Return the last cycle in the finalized schedule.
     628         618 :   int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
     629             : 
     630             :   /// Return the cycle of the earliest scheduled instruction in the dependence
     631             :   /// chain.
     632             :   int earliestCycleInChain(const SDep &Dep);
     633             : 
     634             :   /// Return the cycle of the latest scheduled instruction in the dependence
     635             :   /// chain.
     636             :   int latestCycleInChain(const SDep &Dep);
     637             : 
     638             :   void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
     639             :                     int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
     640             :   bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
     641             : 
     642             :   /// Iterators for the cycle to instruction map.
     643             :   using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
     644             :   using const_sched_iterator =
     645             :       DenseMap<int, std::deque<SUnit *>>::const_iterator;
     646             : 
     647             :   /// Return true if the instruction is scheduled at the specified stage.
     648             :   bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
     649        1666 :     return (stageScheduled(SU) == (int)StageNum);
     650             :   }
     651             : 
     652             :   /// Return the stage for a scheduled instruction.  Return -1 if
     653             :   /// the instruction has not been scheduled.
     654       12733 :   int stageScheduled(SUnit *SU) const {
     655             :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
     656       12733 :     if (it == InstrToCycle.end())
     657             :       return -1;
     658       12095 :     return (it->second - FirstCycle) / InitiationInterval;
     659             :   }
     660             : 
     661             :   /// Return the cycle for a scheduled instruction. This function normalizes
     662             :   /// the first cycle to be 0.
     663             :   unsigned cycleScheduled(SUnit *SU) const {
     664             :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
     665             :     assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
     666        3211 :     return (it->second - FirstCycle) % InitiationInterval;
     667             :   }
     668             : 
     669             :   /// Return the maximum stage count needed for this schedule.
     670             :   unsigned getMaxStageCount() {
     671        4082 :     return (LastCycle - FirstCycle) / InitiationInterval;
     672             :   }
     673             : 
     674             :   /// Return the max. number of stages/iterations that can occur between a
     675             :   /// register definition and its uses.
     676        1192 :   unsigned getStagesForReg(int Reg, unsigned CurStage) {
     677        1192 :     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
     678        2384 :     if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
     679             :       return 1;
     680        1190 :     return Stages.first;
     681             :   }
     682             : 
     683             :   /// The number of stages for a Phi is a little different than other
     684             :   /// instructions. The minimum value computed in RegToStageDiff is 1
     685             :   /// because we assume the Phi is needed for at least 1 iteration.
     686             :   /// This is not the case if the loop value is scheduled prior to the
     687             :   /// Phi in the same stage.  This function returns the number of stages
     688             :   /// or iterations needed between the Phi definition and any uses.
     689             :   unsigned getStagesForPhi(int Reg) {
     690         195 :     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
     691         195 :     if (Stages.second)
     692             :       return Stages.first;
     693         193 :     return Stages.first - 1;
     694             :   }
     695             : 
     696             :   /// Return the instructions that are scheduled at the specified cycle.
     697             :   std::deque<SUnit *> &getInstructions(int cycle) {
     698        6398 :     return ScheduledInstrs[cycle];
     699             :   }
     700             : 
     701             :   bool isValidSchedule(SwingSchedulerDAG *SSD);
     702             :   void finalizeSchedule(SwingSchedulerDAG *SSD);
     703             :   bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
     704             :                        std::deque<SUnit *> &Insts);
     705             :   bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
     706             :   bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
     707             :                              MachineOperand &MO);
     708             :   void print(raw_ostream &os) const;
     709             :   void dump() const;
     710             : };
     711             : 
     712             : } // end anonymous namespace
     713             : 
     714             : unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
     715             : char MachinePipeliner::ID = 0;
     716             : #ifndef NDEBUG
     717             : int MachinePipeliner::NumTries = 0;
     718             : #endif
     719             : char &llvm::MachinePipelinerID = MachinePipeliner::ID;
     720             : 
     721       24375 : INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
     722             :                       "Modulo Software Pipelining", false, false)
     723       24375 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     724       24375 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     725       24375 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     726       24375 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     727      135674 : INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
     728             :                     "Modulo Software Pipelining", false, false)
     729             : 
     730             : /// The "main" function for implementing Swing Modulo Scheduling.
     731        1838 : bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
     732        1838 :   if (skipFunction(mf.getFunction()))
     733             :     return false;
     734             : 
     735        1828 :   if (!EnableSWP)
     736             :     return false;
     737             : 
     738        1835 :   if (mf.getFunction().getAttributes().hasAttribute(
     739        1835 :           AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
     740          14 :       !EnableSWPOptSize.getPosition())
     741             :     return false;
     742             : 
     743        1807 :   MF = &mf;
     744        1807 :   MLI = &getAnalysis<MachineLoopInfo>();
     745        1807 :   MDT = &getAnalysis<MachineDominatorTree>();
     746        1807 :   TII = MF->getSubtarget().getInstrInfo();
     747        1807 :   RegClassInfo.runOnMachineFunction(*MF);
     748             : 
     749        3798 :   for (auto &L : *MLI)
     750         184 :     scheduleLoop(*L);
     751             : 
     752             :   return false;
     753             : }
     754             : 
     755             : /// Attempt to perform the SMS algorithm on the specified loop. This function is
     756             : /// the main entry point for the algorithm.  The function identifies candidate
     757             : /// loops, calculates the minimum initiation interval, and attempts to schedule
     758             : /// the loop.
     759         197 : bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
     760             :   bool Changed = false;
     761         210 :   for (auto &InnerLoop : L)
     762          13 :     Changed |= scheduleLoop(*InnerLoop);
     763             : 
     764             : #ifndef NDEBUG
     765             :   // Stop trying after reaching the limit (if any).
     766             :   int Limit = SwpLoopLimit;
     767             :   if (Limit >= 0) {
     768             :     if (NumTries >= SwpLoopLimit)
     769             :       return Changed;
     770             :     NumTries++;
     771             :   }
     772             : #endif
     773             : 
     774         197 :   if (!canPipelineLoop(L))
     775             :     return Changed;
     776             : 
     777             :   ++NumTrytoPipeline;
     778             : 
     779         114 :   Changed = swingModuloScheduler(L);
     780             : 
     781         114 :   return Changed;
     782             : }
     783             : 
     784             : /// Return true if the loop can be software pipelined.  The algorithm is
     785             : /// restricted to loops with a single basic block.  Make sure that the
     786             : /// branch in the loop can be analyzed.
     787         197 : bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
     788         197 :   if (L.getNumBlocks() != 1)
     789             :     return false;
     790             : 
     791             :   // Check if the branch can't be understood because we can't do pipelining
     792             :   // if that's the case.
     793         168 :   LI.TBB = nullptr;
     794         168 :   LI.FBB = nullptr;
     795         168 :   LI.BrCond.clear();
     796         336 :   if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
     797             :     return false;
     798             : 
     799         168 :   LI.LoopInductionVar = nullptr;
     800         168 :   LI.LoopCompare = nullptr;
     801         168 :   if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
     802             :     return false;
     803             : 
     804         115 :   if (!L.getLoopPreheader())
     805             :     return false;
     806             : 
     807             :   // If any of the Phis contain subregs, then we can't pipeline
     808             :   // because we don't know how to maintain subreg information in the
     809             :   // VMap structure.
     810             :   MachineBasicBlock *MBB = L.getHeader();
     811         431 :   for (auto &PHI : MBB->phis())
     812        1006 :     for (unsigned i = 1; i != PHI.getNumOperands(); i += 2)
     813         806 :       if (PHI.getOperand(i).getSubReg() != 0)
     814           1 :         return false;
     815             : 
     816         114 :   return true;
     817             : }
     818             : 
     819             : /// The SMS algorithm consists of the following main steps:
     820             : /// 1. Computation and analysis of the dependence graph.
     821             : /// 2. Ordering of the nodes (instructions).
     822             : /// 3. Attempt to Schedule the loop.
     823         114 : bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
     824             :   assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
     825             : 
     826         228 :   SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
     827             : 
     828             :   MachineBasicBlock *MBB = L.getHeader();
     829             :   // The kernel should not include any terminator instructions.  These
     830             :   // will be added back later.
     831         114 :   SMS.startBlock(MBB);
     832             : 
     833             :   // Compute the number of 'real' instructions in the basic block by
     834             :   // ignoring terminators.
     835             :   unsigned size = MBB->size();
     836         342 :   for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
     837             :                                    E = MBB->instr_end();
     838         342 :        I != E; ++I, --size)
     839             :     ;
     840             : 
     841         228 :   SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
     842         114 :   SMS.schedule();
     843         114 :   SMS.exitRegion();
     844             : 
     845         114 :   SMS.finishBlock();
     846         228 :   return SMS.hasNewSchedule();
     847             : }
     848             : 
     849             : /// We override the schedule function in ScheduleDAGInstrs to implement the
     850             : /// scheduling part of the Swing Modulo Scheduling algorithm.
     851         114 : void SwingSchedulerDAG::schedule() {
     852         114 :   AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
     853         114 :   buildSchedGraph(AA);
     854         114 :   addLoopCarriedDependences(AA);
     855         114 :   updatePhiDependences();
     856         114 :   Topo.InitDAGTopologicalSorting();
     857             :   postprocessDAG();
     858         114 :   changeDependences();
     859             :   DEBUG({
     860             :     for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
     861             :       SUnits[su].dumpAll(this);
     862             :   });
     863             : 
     864          87 :   NodeSetType NodeSets;
     865         114 :   findCircuits(NodeSets);
     866             : 
     867             :   // Calculate the MII.
     868         114 :   unsigned ResMII = calculateResMII();
     869         114 :   unsigned RecMII = calculateRecMII(NodeSets);
     870             : 
     871         114 :   fuseRecs(NodeSets);
     872             : 
     873             :   // This flag is used for testing and can cause correctness problems.
     874         114 :   if (SwpIgnoreRecMII)
     875           0 :     RecMII = 0;
     876             : 
     877         114 :   MII = std::max(ResMII, RecMII);
     878             :   DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
     879             :                << ")\n");
     880             : 
     881             :   // Can't schedule a loop without a valid MII.
     882         114 :   if (MII == 0)
     883          27 :     return;
     884             : 
     885             :   // Don't pipeline large loops.
     886         114 :   if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
     887             :     return;
     888             : 
     889         114 :   computeNodeFunctions(NodeSets);
     890             : 
     891         114 :   registerPressureFilter(NodeSets);
     892             : 
     893         114 :   colocateNodeSets(NodeSets);
     894             : 
     895         114 :   checkNodeSets(NodeSets);
     896             : 
     897             :   DEBUG({
     898             :     for (auto &I : NodeSets) {
     899             :       dbgs() << "  Rec NodeSet ";
     900             :       I.dump();
     901             :     }
     902             :   });
     903             : 
     904             :   std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
     905             : 
     906         114 :   groupRemainingNodes(NodeSets);
     907             : 
     908         114 :   removeDuplicateNodes(NodeSets);
     909             : 
     910             :   DEBUG({
     911             :     for (auto &I : NodeSets) {
     912             :       dbgs() << "  NodeSet ";
     913             :       I.dump();
     914             :     }
     915             :   });
     916             : 
     917         114 :   computeNodeOrder(NodeSets);
     918             : 
     919         201 :   SMSchedule Schedule(Pass.MF);
     920         114 :   Scheduled = schedulePipeline(Schedule);
     921             : 
     922         114 :   if (!Scheduled)
     923          27 :     return;
     924             : 
     925          87 :   unsigned numStages = Schedule.getMaxStageCount();
     926             :   // No need to generate pipeline if there are no overlapped iterations.
     927          87 :   if (numStages == 0)
     928             :     return;
     929             : 
     930             :   // Check that the maximum stage count is less than user-defined limit.
     931          87 :   if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
     932             :     return;
     933             : 
     934          87 :   generatePipelinedLoop(Schedule);
     935             :   ++NumPipelined;
     936             : }
     937             : 
     938             : /// Clean up after the software pipeliner runs.
     939         114 : void SwingSchedulerDAG::finishBlock() {
     940         114 :   for (MachineInstr *I : NewMIs)
     941           9 :     MF.DeleteMachineInstr(I);
     942         114 :   NewMIs.clear();
     943             : 
     944             :   // Call the superclass.
     945         114 :   ScheduleDAGInstrs::finishBlock();
     946         114 : }
     947             : 
     948             : /// Return the register values for  the operands of a Phi instruction.
     949             : /// This function assume the instruction is a Phi.
     950             : static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
     951             :                        unsigned &InitVal, unsigned &LoopVal) {
     952             :   assert(Phi.isPHI() && "Expecting a Phi.");
     953             : 
     954             :   InitVal = 0;
     955             :   LoopVal = 0;
     956        4077 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     957        5436 :     if (Phi.getOperand(i + 1).getMBB() != Loop)
     958         506 :       InitVal = Phi.getOperand(i).getReg();
     959             :     else
     960        1359 :       LoopVal = Phi.getOperand(i).getReg();
     961             : 
     962             :   assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
     963             : }
     964             : 
     965             : /// Return the Phi register value that comes from the incoming block.
     966             : static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
     967         143 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     968         286 :     if (Phi.getOperand(i + 1).getMBB() != LoopBB)
     969         143 :       return Phi.getOperand(i).getReg();
     970             :   return 0;
     971             : }
     972             : 
     973             : /// Return the Phi register value that comes the loop block.
     974             : static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
     975        1395 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     976        2584 :     if (Phi.getOperand(i + 1).getMBB() == LoopBB)
     977         620 :       return Phi.getOperand(i).getReg();
     978             :   return 0;
     979             : }
     980             : 
     981             : /// Return true if SUb can be reached from SUa following the chain edges.
     982          50 : static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
     983             :   SmallPtrSet<SUnit *, 8> Visited;
     984             :   SmallVector<SUnit *, 8> Worklist;
     985          50 :   Worklist.push_back(SUa);
     986         126 :   while (!Worklist.empty()) {
     987             :     const SUnit *SU = Worklist.pop_back_val();
     988         300 :     for (auto &SI : SU->Succs) {
     989         124 :       SUnit *SuccSU = SI.getSUnit();
     990         124 :       if (SI.getKind() == SDep::Order) {
     991          74 :         if (Visited.count(SuccSU))
     992           0 :           continue;
     993          74 :         if (SuccSU == SUb)
     994          24 :           return true;
     995          50 :         Worklist.push_back(SuccSU);
     996          50 :         Visited.insert(SuccSU);
     997             :       }
     998             :     }
     999             :   }
    1000             :   return false;
    1001             : }
    1002             : 
    1003             : /// Return true if the instruction causes a chain between memory
    1004             : /// references before and after it.
    1005         880 : static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
    1006        1758 :   return MI.isCall() || MI.hasUnmodeledSideEffects() ||
    1007         886 :          (MI.hasOrderedMemoryRef() &&
    1008          16 :           (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
    1009             : }
    1010             : 
    1011             : /// Return the underlying objects for the memory references of an instruction.
    1012             : /// This function calls the code in ValueTracking, but first checks that the
    1013             : /// instruction has a memory operand.
    1014         257 : static void getUnderlyingObjects(MachineInstr *MI,
    1015             :                                  SmallVectorImpl<Value *> &Objs,
    1016             :                                  const DataLayout &DL) {
    1017         257 :   if (!MI->hasOneMemOperand())
    1018             :     return;
    1019         217 :   MachineMemOperand *MM = *MI->memoperands_begin();
    1020         209 :   if (!MM->getValue())
    1021             :     return;
    1022         209 :   GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
    1023             : }
    1024             : 
    1025             : /// Add a chain edge between a load and store if the store can be an
    1026             : /// alias of the load on a subsequent iteration, i.e., a loop carried
    1027             : /// dependence. This code is very similar to the code in ScheduleDAGInstrs
    1028             : /// but that code doesn't create loop carried dependences.
    1029         114 : void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
    1030             :   MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
    1031         994 :   for (auto &SU : SUnits) {
    1032         880 :     MachineInstr &MI = *SU.getInstr();
    1033         880 :     if (isDependenceBarrier(MI, AA))
    1034             :       PendingLoads.clear();
    1035         870 :     else if (MI.mayLoad()) {
    1036             :       SmallVector<Value *, 4> Objs;
    1037         168 :       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
    1038         424 :       for (auto V : Objs) {
    1039         128 :         SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
    1040         128 :         SUs.push_back(&SU);
    1041             :       }
    1042         702 :     } else if (MI.mayStore()) {
    1043             :       SmallVector<Value *, 4> Objs;
    1044          89 :       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
    1045         251 :       for (auto V : Objs) {
    1046             :         MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
    1047          81 :             PendingLoads.find(V);
    1048          81 :         if (I == PendingLoads.end())
    1049             :           continue;
    1050         124 :         for (auto Load : I->second) {
    1051          50 :           if (isSuccOrder(Load, &SU))
    1052          61 :             continue;
    1053          26 :           MachineInstr &LdMI = *Load->getInstr();
    1054             :           // First, perform the cheaper check that compares the base register.
    1055             :           // If they are the same and the load offset is less than the store
    1056             :           // offset, then mark the dependence as loop carried potentially.
    1057             :           unsigned BaseReg1, BaseReg2;
    1058             :           int64_t Offset1, Offset2;
    1059          52 :           if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
    1060          26 :               !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
    1061           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1062           0 :             continue;            
    1063             :           }
    1064          39 :           if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
    1065             :             assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
    1066             :                    "What happened to the chain edge?");
    1067          13 :             SU.addPred(SDep(Load, SDep::Barrier));
    1068          13 :             continue;
    1069             :           }
    1070             :           // Second, the more expensive check that uses alias analysis on the
    1071             :           // base registers. If they alias, and the load offset is less than
    1072             :           // the store offset, the mark the dependence as loop carried.
    1073          13 :           if (!AA) {
    1074           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1075           0 :             continue;
    1076             :           }
    1077          13 :           MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
    1078          13 :           MachineMemOperand *MMO2 = *MI.memoperands_begin();
    1079          26 :           if (!MMO1->getValue() || !MMO2->getValue()) {
    1080           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1081           0 :             continue;
    1082             :           }
    1083          26 :           if (MMO1->getValue() == MMO2->getValue() &&
    1084          13 :               MMO1->getOffset() <= MMO2->getOffset()) {
    1085           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1086           0 :             continue;
    1087             :           }
    1088             :           AliasResult AAResult = AA->alias(
    1089          13 :               MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
    1090             :                              MMO1->getAAInfo()),
    1091          13 :               MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
    1092          13 :                              MMO2->getAAInfo()));
    1093             : 
    1094          13 :           if (AAResult != NoAlias)
    1095          13 :             SU.addPred(SDep(Load, SDep::Barrier));
    1096             :         }
    1097             :       }
    1098             :     }
    1099             :   }
    1100         114 : }
    1101             : 
    1102             : /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
    1103             : /// processes dependences for PHIs. This function adds true dependences
    1104             : /// from a PHI to a use, and a loop carried dependence from the use to the
    1105             : /// PHI. The loop carried dependence is represented as an anti dependence
    1106             : /// edge. This function also removes chain dependences between unrelated
    1107             : /// PHIs.
    1108         114 : void SwingSchedulerDAG::updatePhiDependences() {
    1109             :   SmallVector<SDep, 4> RemoveDeps;
    1110         114 :   const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
    1111             : 
    1112             :   // Iterate over each DAG node.
    1113         994 :   for (SUnit &I : SUnits) {
    1114             :     RemoveDeps.clear();
    1115             :     // Set to true if the instruction has an operand defined by a Phi.
    1116             :     unsigned HasPhiUse = 0;
    1117             :     unsigned HasPhiDef = 0;
    1118         880 :     MachineInstr *MI = I.getInstr();
    1119             :     // Iterate over each operand, and we process the definitions.
    1120        4159 :     for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
    1121         880 :                                     MOE = MI->operands_end();
    1122        4159 :          MOI != MOE; ++MOI) {
    1123        3279 :       if (!MOI->isReg())
    1124         883 :         continue;
    1125        2396 :       unsigned Reg = MOI->getReg();
    1126        2396 :       if (MOI->isDef()) {
    1127             :         // If the register is used by a Phi, then create an anti dependence.
    1128        1148 :         for (MachineRegisterInfo::use_instr_iterator
    1129         828 :                  UI = MRI.use_instr_begin(Reg),
    1130             :                  UE = MRI.use_instr_end();
    1131        1976 :              UI != UE; ++UI) {
    1132             :           MachineInstr *UseMI = &*UI;
    1133             :           SUnit *SU = getSUnit(UseMI);
    1134        1116 :           if (SU != nullptr && UseMI->isPHI()) {
    1135             :             if (!MI->isPHI()) {
    1136             :               SDep Dep(SU, SDep::Anti, Reg);
    1137         191 :               I.addPred(Dep);
    1138             :             } else {
    1139             :               HasPhiDef = Reg;
    1140             :               // Add a chain edge to a dependent Phi that isn't an existing
    1141             :               // predecessor.
    1142           7 :               if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
    1143           1 :                 I.addPred(SDep(SU, SDep::Barrier));
    1144             :             }
    1145             :           }
    1146             :         }
    1147             :       } else if (MOI->isUse()) {
    1148             :         // If the register is defined by a Phi, then create a true dependence.
    1149        1568 :         MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
    1150        1568 :         if (DefMI == nullptr)
    1151          11 :           continue;
    1152             :         SUnit *SU = getSUnit(DefMI);
    1153        1123 :         if (SU != nullptr && DefMI->isPHI()) {
    1154             :           if (!MI->isPHI()) {
    1155             :             SDep Dep(SU, SDep::Data, Reg);
    1156             :             Dep.setLatency(0);
    1157         394 :             ST.adjustSchedDependency(SU, &I, Dep);
    1158         394 :             I.addPred(Dep);
    1159             :           } else {
    1160             :             HasPhiUse = Reg;
    1161             :             // Add a chain edge to a dependent Phi that isn't an existing
    1162             :             // predecessor.
    1163          11 :             if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
    1164           0 :               I.addPred(SDep(SU, SDep::Barrier));
    1165             :           }
    1166             :         }
    1167             :       }
    1168             :     }
    1169             :     // Remove order dependences from an unrelated Phi.
    1170         880 :     if (!SwpPruneDeps)
    1171           0 :       continue;
    1172        3340 :     for (auto &PI : I.Preds) {
    1173        1230 :       MachineInstr *PMI = PI.getSUnit()->getInstr();
    1174         590 :       if (PMI->isPHI() && PI.getKind() == SDep::Order) {
    1175           1 :         if (I.getInstr()->isPHI()) {
    1176           1 :           if (PMI->getOperand(0).getReg() == HasPhiUse)
    1177           0 :             continue;
    1178           2 :           if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
    1179           1 :             continue;
    1180             :         }
    1181           0 :         RemoveDeps.push_back(PI);
    1182             :       }
    1183             :     }
    1184         880 :     for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
    1185           0 :       I.removePred(RemoveDeps[i]);
    1186             :   }
    1187         114 : }
    1188             : 
    1189             : /// Iterate over each DAG node and see if we can change any dependences
    1190             : /// in order to reduce the recurrence MII.
    1191         114 : void SwingSchedulerDAG::changeDependences() {
    1192             :   // See if an instruction can use a value from the previous iteration.
    1193             :   // If so, we update the base and offset of the instruction and change
    1194             :   // the dependences.
    1195         994 :   for (SUnit &I : SUnits) {
    1196         880 :     unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
    1197         880 :     int64_t NewOffset = 0;
    1198         880 :     if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
    1199             :                                NewOffset))
    1200        1739 :       continue;
    1201             : 
    1202             :     // Get the MI and SUnit for the instruction that defines the original base.
    1203          22 :     unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
    1204          11 :     MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
    1205          11 :     if (!DefMI)
    1206           0 :       continue;
    1207             :     SUnit *DefSU = getSUnit(DefMI);
    1208          11 :     if (!DefSU)
    1209           0 :       continue;
    1210             :     // Get the MI and SUnit for the instruction that defins the new base.
    1211          11 :     MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
    1212          11 :     if (!LastMI)
    1213           0 :       continue;
    1214             :     SUnit *LastSU = getSUnit(LastMI);
    1215          11 :     if (!LastSU)
    1216           0 :       continue;
    1217             : 
    1218          11 :     if (Topo.IsReachable(&I, LastSU))
    1219           1 :       continue;
    1220             : 
    1221             :     // Remove the dependence. The value now depends on a prior iteration.
    1222             :     SmallVector<SDep, 4> Deps;
    1223          30 :     for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
    1224             :          ++P)
    1225          10 :       if (P->getSUnit() == DefSU)
    1226          10 :         Deps.push_back(*P);
    1227          20 :     for (int i = 0, e = Deps.size(); i != e; i++) {
    1228          20 :       Topo.RemovePred(&I, Deps[i].getSUnit());
    1229          10 :       I.removePred(Deps[i]);
    1230             :     }
    1231             :     // Remove the chain dependence between the instructions.
    1232             :     Deps.clear();
    1233          94 :     for (auto &P : LastSU->Preds)
    1234          52 :       if (P.getSUnit() == &I && P.getKind() == SDep::Order)
    1235          10 :         Deps.push_back(P);
    1236          20 :     for (int i = 0, e = Deps.size(); i != e; i++) {
    1237          20 :       Topo.RemovePred(LastSU, Deps[i].getSUnit());
    1238          10 :       LastSU->removePred(Deps[i]);
    1239             :     }
    1240             : 
    1241             :     // Add a dependence between the new instruction and the instruction
    1242             :     // that defines the new base.
    1243          10 :     SDep Dep(&I, SDep::Anti, NewBase);
    1244          10 :     LastSU->addPred(Dep);
    1245             : 
    1246             :     // Remember the base and offset information so that we can update the
    1247             :     // instruction during code generation.
    1248          20 :     InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
    1249             :   }
    1250         114 : }
    1251             : 
    1252             : namespace {
    1253             : 
    1254             : // FuncUnitSorter - Comparison operator used to sort instructions by
    1255             : // the number of functional unit choices.
    1256        5684 : struct FuncUnitSorter {
    1257             :   const InstrItineraryData *InstrItins;
    1258             :   DenseMap<unsigned, unsigned> Resources;
    1259             : 
    1260         114 :   FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
    1261             : 
    1262             :   // Compute the number of functional unit alternatives needed
    1263             :   // at each stage, and take the minimum value. We prioritize the
    1264             :   // instructions by the least number of choices first.
    1265             :   unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
    1266        4186 :     unsigned schedClass = Inst->getDesc().getSchedClass();
    1267             :     unsigned min = UINT_MAX;
    1268        8300 :     for (const InstrStage *IS = InstrItins->beginStage(schedClass),
    1269             :                           *IE = InstrItins->endStage(schedClass);
    1270       10393 :          IS != IE; ++IS) {
    1271        6207 :       unsigned funcUnits = IS->getUnits();
    1272             :       unsigned numAlternatives = countPopulation(funcUnits);
    1273        6207 :       if (numAlternatives < min) {
    1274             :         min = numAlternatives;
    1275        4920 :         F = funcUnits;
    1276             :       }
    1277             :     }
    1278             :     return min;
    1279             :   }
    1280             : 
    1281             :   // Compute the critical resources needed by the instruction. This
    1282             :   // function records the functional units needed by instructions that
    1283             :   // must use only one functional unit. We use this as a tie breaker
    1284             :   // for computing the resource MII. The instrutions that require
    1285             :   // the same, highly used, functional unit have high priority.
    1286         682 :   void calcCriticalResources(MachineInstr &MI) {
    1287         682 :     unsigned SchedClass = MI.getDesc().getSchedClass();
    1288        1589 :     for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
    1289             :                           *IE = InstrItins->endStage(SchedClass);
    1290        1589 :          IS != IE; ++IS) {
    1291         907 :       unsigned FuncUnits = IS->getUnits();
    1292         907 :       if (countPopulation(FuncUnits) == 1)
    1293         394 :         Resources[FuncUnits]++;
    1294             :     }
    1295         682 :   }
    1296             : 
    1297             :   /// Return true if IS1 has less priority than IS2.
    1298        2093 :   bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
    1299        2093 :     unsigned F1 = 0, F2 = 0;
    1300        2093 :     unsigned MFUs1 = minFuncUnits(IS1, F1);
    1301             :     unsigned MFUs2 = minFuncUnits(IS2, F2);
    1302        2093 :     if (MFUs1 == 1 && MFUs2 == 1)
    1303         842 :       return Resources.lookup(F1) < Resources.lookup(F2);
    1304        1672 :     return MFUs1 > MFUs2;
    1305             :   }
    1306             : };
    1307             : 
    1308             : } // end anonymous namespace
    1309             : 
    1310             : /// Calculate the resource constrained minimum initiation interval for the
    1311             : /// specified loop. We use the DFA to model the resources needed for
    1312             : /// each instruction, and we ignore dependences. A different DFA is created
    1313             : /// for each cycle that is required. When adding a new instruction, we attempt
    1314             : /// to add it to each existing DFA, until a legal space is found. If the
    1315             : /// instruction cannot be reserved in an existing DFA, we create a new one.
    1316         114 : unsigned SwingSchedulerDAG::calculateResMII() {
    1317             :   SmallVector<DFAPacketizer *, 8> Resources;
    1318         114 :   MachineBasicBlock *MBB = Loop.getHeader();
    1319         114 :   Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
    1320             : 
    1321             :   // Sort the instructions by the number of available choices for scheduling,
    1322             :   // least to most. Use the number of critical resources as the tie breaker.
    1323             :   FuncUnitSorter FUS =
    1324         114 :       FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
    1325         114 :   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
    1326         114 :                                    E = MBB->getFirstTerminator();
    1327         796 :        I != E; ++I)
    1328         682 :     FUS.calcCriticalResources(*I);
    1329             :   PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
    1330         114 :       FuncUnitOrder(FUS);
    1331             : 
    1332         114 :   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
    1333         114 :                                    E = MBB->getFirstTerminator();
    1334         796 :        I != E; ++I)
    1335         682 :     FuncUnitOrder.push(&*I);
    1336             : 
    1337         796 :   while (!FuncUnitOrder.empty()) {
    1338         682 :     MachineInstr *MI = FuncUnitOrder.top();
    1339         682 :     FuncUnitOrder.pop();
    1340        1364 :     if (TII->isZeroCost(MI->getOpcode()))
    1341          23 :       continue;
    1342             :     // Attempt to reserve the instruction in an existing DFA. At least one
    1343             :     // DFA is needed for each cycle.
    1344         659 :     unsigned NumCycles = getSUnit(MI)->Latency;
    1345             :     unsigned ReservedCycles = 0;
    1346             :     SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
    1347             :     SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
    1348        1977 :     for (unsigned C = 0; C < NumCycles; ++C)
    1349        2050 :       while (RI != RE) {
    1350        1944 :         if ((*RI++)->canReserveResources(*MI)) {
    1351         553 :           ++ReservedCycles;
    1352         553 :           break;
    1353             :         }
    1354             :       }
    1355             :     // Start reserving resources using existing DFAs.
    1356        1765 :     for (unsigned C = 0; C < ReservedCycles; ++C) {
    1357         553 :       --RI;
    1358         553 :       (*RI)->reserveResources(*MI);
    1359             :     }
    1360             :     // Add new DFAs, if needed, to reserve resources.
    1361         871 :     for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
    1362             :       DFAPacketizer *NewResource =
    1363         106 :           TII->CreateTargetScheduleState(MF.getSubtarget());
    1364             :       assert(NewResource->canReserveResources(*MI) && "Reserve error.");
    1365         106 :       NewResource->reserveResources(*MI);
    1366         106 :       Resources.push_back(NewResource);
    1367             :     }
    1368             :   }
    1369             :   int Resmii = Resources.size();
    1370             :   // Delete the memory for each of the DFAs that were created earlier.
    1371         554 :   for (DFAPacketizer *RI : Resources) {
    1372             :     DFAPacketizer *D = RI;
    1373         440 :     delete D;
    1374             :   }
    1375             :   Resources.clear();
    1376         228 :   return Resmii;
    1377             : }
    1378             : 
    1379             : /// Calculate the recurrence-constrainted minimum initiation interval.
    1380             : /// Iterate over each circuit.  Compute the delay(c) and distance(c)
    1381             : /// for each circuit. The II needs to satisfy the inequality
    1382             : /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
    1383             : /// II that satistifies the inequality, and the RecMII is the maximum
    1384             : /// of those values.
    1385             : unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
    1386             :   unsigned RecMII = 0;
    1387             : 
    1388         580 :   for (NodeSet &Nodes : NodeSets) {
    1389         233 :     if (Nodes.empty())
    1390             :       continue;
    1391             : 
    1392         233 :     unsigned Delay = Nodes.size() - 1;
    1393             :     unsigned Distance = 1;
    1394             : 
    1395             :     // ii = ceil(delay / distance)
    1396             :     unsigned CurMII = (Delay + Distance - 1) / Distance;
    1397             :     Nodes.setRecMII(CurMII);
    1398         233 :     if (CurMII > RecMII)
    1399             :       RecMII = CurMII;
    1400             :   }
    1401             : 
    1402             :   return RecMII;
    1403             : }
    1404             : 
    1405             : /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
    1406             : /// but we do this to find the circuits, and then change them back.
    1407         228 : static void swapAntiDependences(std::vector<SUnit> &SUnits) {
    1408             :   SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
    1409        2216 :   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
    1410        1760 :     SUnit *SU = &SUnits[i];
    1411        2440 :     for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
    1412        4200 :          IP != EP; ++IP) {
    1413        2440 :       if (IP->getKind() != SDep::Anti)
    1414        2038 :         continue;
    1415         402 :       DepsAdded.push_back(std::make_pair(SU, *IP));
    1416             :     }
    1417             :   }
    1418         402 :   for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
    1419             :                                                           E = DepsAdded.end();
    1420         630 :        I != E; ++I) {
    1421             :     // Remove this anti dependency and add one in the reverse direction.
    1422         402 :     SUnit *SU = I->first;
    1423         402 :     SDep &D = I->second;
    1424             :     SUnit *TargetSU = D.getSUnit();
    1425         402 :     unsigned Reg = D.getReg();
    1426         402 :     unsigned Lat = D.getLatency();
    1427         402 :     SU->removePred(D);
    1428             :     SDep Dep(SU, SDep::Anti, Reg);
    1429             :     Dep.setLatency(Lat);
    1430         402 :     TargetSU->addPred(Dep);
    1431             :   }
    1432         228 : }
    1433             : 
    1434             : /// Create the adjacency structure of the nodes in the graph.
    1435         114 : void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
    1436             :     SwingSchedulerDAG *DAG) {
    1437         228 :   BitVector Added(SUnits.size());
    1438        1108 :   for (int i = 0, e = SUnits.size(); i != e; ++i) {
    1439             :     Added.reset();
    1440             :     // Add any successor to the adjacency matrix and exclude duplicates.
    1441        4200 :     for (auto &SI : SUnits[i].Succs) {
    1442             :       // Do not process a boundary node and a back-edge is processed only
    1443             :       // if it goes to a Phi.
    1444        2450 :       if (SI.getSUnit()->isBoundaryNode() ||
    1445         201 :           (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
    1446          10 :         continue;
    1447        1210 :       int N = SI.getSUnit()->NodeNum;
    1448        1210 :       if (!Added.test(N)) {
    1449        1191 :         AdjK[i].push_back(N);
    1450        1191 :         Added.set(N);
    1451             :       }
    1452             :     }
    1453             :     // A chain edge between a store and a load is treated as a back-edge in the
    1454             :     // adjacency matrix.
    1455        4200 :     for (auto &PI : SUnits[i].Preds) {
    1456        3895 :       if (!SUnits[i].getInstr()->mayStore() ||
    1457         534 :           !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
    1458        1188 :         continue;
    1459          64 :       if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
    1460          31 :         int N = PI.getSUnit()->NodeNum;
    1461          31 :         if (!Added.test(N)) {
    1462          31 :           AdjK[i].push_back(N);
    1463          31 :           Added.set(N);
    1464             :         }
    1465             :       }
    1466             :     }
    1467             :   }
    1468         114 : }
    1469             : 
    1470             : /// Identify an elementary circuit in the dependence graph starting at the
    1471             : /// specified node.
    1472        4571 : bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
    1473             :                                           bool HasBackedge) {
    1474        9142 :   SUnit *SV = &SUnits[V];
    1475             :   bool F = false;
    1476        4571 :   Stack.insert(SV);
    1477        4571 :   Blocked.set(V);
    1478             : 
    1479       15931 :   for (auto W : AdjK[V]) {
    1480        5976 :     if (NumPaths > MaxPaths)
    1481             :       break;
    1482        5943 :     if (W < S)
    1483         543 :       continue;
    1484        5400 :     if (W == S) {
    1485         263 :       if (!HasBackedge)
    1486         699 :         NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
    1487             :       F = true;
    1488         263 :       ++NumPaths;
    1489             :       break;
    1490       10274 :     } else if (!Blocked.test(W)) {
    1491        3691 :       if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
    1492             :         F = true;
    1493             :     }
    1494             :   }
    1495             : 
    1496        4308 :   if (F)
    1497         669 :     unblock(V);
    1498             :   else {
    1499       13476 :     for (auto W : AdjK[V]) {
    1500        4787 :       if (W < S)
    1501         508 :         continue;
    1502        8558 :       if (B[W].count(SV) == 0)
    1503        4264 :         B[W].insert(SV);
    1504             :     }
    1505             :   }
    1506             :   Stack.pop_back();
    1507        4571 :   return F;
    1508             : }
    1509             : 
    1510             : /// Unblock a node in the circuit finding algorithm.
    1511         763 : void SwingSchedulerDAG::Circuits::unblock(int U) {
    1512         763 :   Blocked.reset(U);
    1513         763 :   SmallPtrSet<SUnit *, 4> &BU = B[U];
    1514        1035 :   while (!BU.empty()) {
    1515         136 :     SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
    1516             :     assert(SI != BU.end() && "Invalid B set.");
    1517         136 :     SUnit *W = *SI;
    1518             :     BU.erase(W);
    1519         272 :     if (Blocked.test(W->NodeNum))
    1520          94 :       unblock(W->NodeNum);
    1521             :   }
    1522         763 : }
    1523             : 
    1524             : /// Identify all the elementary circuits in the dependence graph using
    1525             : /// Johnson's circuit algorithm.
    1526         114 : void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
    1527             :   // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
    1528             :   // but we do this to find the circuits, and then change them back.
    1529         114 :   swapAntiDependences(SUnits);
    1530             : 
    1531         228 :   Circuits Cir(SUnits);
    1532             :   // Create the adjacency structure.
    1533         114 :   Cir.createAdjacencyStructure(this);
    1534        1108 :   for (int i = 0, e = SUnits.size(); i != e; ++i) {
    1535         880 :     Cir.reset();
    1536         880 :     Cir.circuit(i, i, NodeSets);
    1537             :   }
    1538             : 
    1539             :   // Change the dependences back so that we've created a DAG again.
    1540         114 :   swapAntiDependences(SUnits);
    1541         114 : }
    1542             : 
    1543             : /// Return true for DAG nodes that we ignore when computing the cost functions.
    1544             : /// We ignore the back-edge recurrence in order to avoid unbounded recurison
    1545             : /// in the calculation of the ASAP, ALAP, etc functions.
    1546             : static bool ignoreDependence(const SDep &D, bool isPred) {
    1547             :   if (D.isArtificial())
    1548             :     return true;
    1549        4952 :   return D.getKind() == SDep::Anti && isPred;
    1550             : }
    1551             : 
    1552             : /// Compute several functions need to order the nodes for scheduling.
    1553             : ///  ASAP - Earliest time to schedule a node.
    1554             : ///  ALAP - Latest time to schedule a node.
    1555             : ///  MOV - Mobility function, difference between ALAP and ASAP.
    1556             : ///  D - Depth of each node.
    1557             : ///  H - Height of each node.
    1558         114 : void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
    1559         228 :   ScheduleInfo.resize(SUnits.size());
    1560             : 
    1561             :   DEBUG({
    1562             :     for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
    1563             :                                                     E = Topo.end();
    1564             :          I != E; ++I) {
    1565             :       SUnit *SU = &SUnits[*I];
    1566             :       SU->dump(this);
    1567             :     }
    1568             :   });
    1569             : 
    1570         114 :   int maxASAP = 0;
    1571             :   // Compute ASAP.
    1572             :   for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
    1573             :                                                   E = Topo.end();
    1574         994 :        I != E; ++I) {
    1575         880 :     int asap = 0;
    1576         880 :     SUnit *SU = &SUnits[*I];
    1577        1220 :     for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
    1578             :                                     EP = SU->Preds.end();
    1579        2100 :          IP != EP; ++IP) {
    1580         201 :       if (ignoreDependence(*IP, true))
    1581         201 :         continue;
    1582             :       SUnit *pred = IP->getSUnit();
    1583        3057 :       asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
    1584             :                                   getDistance(pred, SU, *IP) * MII));
    1585             :     }
    1586         880 :     maxASAP = std::max(maxASAP, asap);
    1587        1760 :     ScheduleInfo[*I].ASAP = asap;
    1588             :   }
    1589             : 
    1590             :   // Compute ALAP and MOV.
    1591             :   for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
    1592             :                                                           E = Topo.rend();
    1593         994 :        I != E; ++I) {
    1594         880 :     int alap = maxASAP;
    1595         880 :     SUnit *SU = &SUnits[*I];
    1596        1220 :     for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
    1597             :                                     ES = SU->Succs.end();
    1598        2100 :          IS != ES; ++IS) {
    1599         201 :       if (ignoreDependence(*IS, true))
    1600         201 :         continue;
    1601             :       SUnit *succ = IS->getSUnit();
    1602        3057 :       alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
    1603             :                                   getDistance(SU, succ, *IS) * MII));
    1604             :     }
    1605             : 
    1606        1760 :     ScheduleInfo[*I].ALAP = alap;
    1607             :   }
    1608             : 
    1609             :   // After computing the node functions, compute the summary for each node set.
    1610         530 :   for (NodeSet &I : NodeSets)
    1611         208 :     I.computeNodeSetInfo(this);
    1612             : 
    1613             :   DEBUG({
    1614             :     for (unsigned i = 0; i < SUnits.size(); i++) {
    1615             :       dbgs() << "\tNode " << i << ":\n";
    1616             :       dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";
    1617             :       dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";
    1618             :       dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";
    1619             :       dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";
    1620             :       dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";
    1621             :     }
    1622             :   });
    1623         114 : }
    1624             : 
    1625             : /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
    1626             : /// as the predecessors of the elements of NodeOrder that are not also in
    1627             : /// NodeOrder.
    1628         460 : static bool pred_L(SetVector<SUnit *> &NodeOrder,
    1629             :                    SmallSetVector<SUnit *, 8> &Preds,
    1630             :                    const NodeSet *S = nullptr) {
    1631         460 :   Preds.clear();
    1632             :   for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
    1633        2479 :        I != E; ++I) {
    1634        4994 :     for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
    1635        4994 :          PI != PE; ++PI) {
    1636        3438 :       if (S && S->count(PI->getSUnit()) == 0)
    1637         463 :         continue;
    1638         437 :       if (ignoreDependence(*PI, true))
    1639         437 :         continue;
    1640        2075 :       if (NodeOrder.count(PI->getSUnit()) == 0)
    1641         138 :         Preds.insert(PI->getSUnit());
    1642             :     }
    1643             :     // Back-edges are predecessors with an anti-dependence.
    1644        4993 :     for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
    1645             :                                     ES = (*I)->Succs.end();
    1646        4993 :          IS != ES; ++IS) {
    1647        2974 :       if (IS->getKind() != SDep::Anti)
    1648        2484 :         continue;
    1649         591 :       if (S && S->count(IS->getSUnit()) == 0)
    1650         101 :         continue;
    1651         389 :       if (NodeOrder.count(IS->getSUnit()) == 0)
    1652           0 :         Preds.insert(IS->getSUnit());
    1653             :     }
    1654             :   }
    1655         460 :   return !Preds.empty();
    1656             : }
    1657             : 
    1658             : /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
    1659             : /// as the successors of the elements of NodeOrder that are not also in
    1660             : /// NodeOrder.
    1661        1241 : static bool succ_L(SetVector<SUnit *> &NodeOrder,
    1662             :                    SmallSetVector<SUnit *, 8> &Succs,
    1663             :                    const NodeSet *S = nullptr) {
    1664        1241 :   Succs.clear();
    1665             :   for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
    1666        4770 :        I != E; ++I) {
    1667        9646 :     for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
    1668        9646 :          SI != SE; ++SI) {
    1669        6835 :       if (S && S->count(SI->getSUnit()) == 0)
    1670         718 :         continue;
    1671           0 :       if (ignoreDependence(*SI, false))
    1672           0 :         continue;
    1673        5399 :       if (NodeOrder.count(SI->getSUnit()) == 0)
    1674        1256 :         Succs.insert(SI->getSUnit());
    1675             :     }
    1676        9368 :     for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
    1677             :                                     PE = (*I)->Preds.end();
    1678        9368 :          PI != PE; ++PI) {
    1679        5839 :       if (PI->getKind() != SDep::Anti)
    1680        4577 :         continue;
    1681        1372 :       if (S && S->count(PI->getSUnit()) == 0)
    1682         110 :         continue;
    1683        1152 :       if (NodeOrder.count(PI->getSUnit()) == 0)
    1684         155 :         Succs.insert(PI->getSUnit());
    1685             :     }
    1686             :   }
    1687        1241 :   return !Succs.empty();
    1688             : }
    1689             : 
    1690             : /// Return true if there is a path from the specified node to any of the nodes
    1691             : /// in DestNodes. Keep track and return the nodes in any path.
    1692        1919 : static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
    1693             :                         SetVector<SUnit *> &DestNodes,
    1694             :                         SetVector<SUnit *> &Exclude,
    1695             :                         SmallPtrSet<SUnit *, 8> &Visited) {
    1696        1919 :   if (Cur->isBoundaryNode())
    1697             :     return false;
    1698        1919 :   if (Exclude.count(Cur) != 0)
    1699             :     return false;
    1700        3600 :   if (DestNodes.count(Cur) != 0)
    1701             :     return true;
    1702        1641 :   if (!Visited.insert(Cur).second)
    1703         642 :     return Path.count(Cur) != 0;
    1704             :   bool FoundPath = false;
    1705        5554 :   for (auto &SI : Cur->Succs)
    1706        1457 :     FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
    1707        7588 :   for (auto &PI : Cur->Preds)
    1708        2474 :     if (PI.getKind() == SDep::Anti)
    1709          59 :       FoundPath |=
    1710          59 :           computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
    1711        1320 :   if (FoundPath)
    1712         118 :     Path.insert(Cur);
    1713             :   return FoundPath;
    1714             : }
    1715             : 
    1716             : /// Return true if Set1 is a subset of Set2.
    1717         226 : template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
    1718         690 :   for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
    1719         399 :     if (Set2.count(*I) == 0)
    1720             :       return false;
    1721             :   return true;
    1722             : }
    1723             : 
    1724             : /// Compute the live-out registers for the instructions in a node-set.
    1725             : /// The live-out registers are those that are defined in the node-set,
    1726             : /// but not used. Except for use operands of Phis.
    1727          19 : static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
    1728             :                             NodeSet &NS) {
    1729          19 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1730          19 :   MachineRegisterInfo &MRI = MF.getRegInfo();
    1731             :   SmallVector<RegisterMaskPair, 8> LiveOutRegs;
    1732          19 :   SmallSet<unsigned, 4> Uses;
    1733         130 :   for (SUnit *SU : NS) {
    1734         111 :     const MachineInstr *MI = SU->getInstr();
    1735           5 :     if (MI->isPHI())
    1736           5 :       continue;
    1737         886 :     for (const MachineOperand &MO : MI->operands())
    1738         711 :       if (MO.isReg() && MO.isUse()) {
    1739         214 :         unsigned Reg = MO.getReg();
    1740         214 :         if (TargetRegisterInfo::isVirtualRegister(Reg))
    1741         205 :           Uses.insert(Reg);
    1742           9 :         else if (MRI.isAllocatable(Reg))
    1743           0 :           for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
    1744           0 :             Uses.insert(*Units);
    1745             :       }
    1746             :   }
    1747         130 :   for (SUnit *SU : NS)
    1748        1052 :     for (const MachineOperand &MO : SU->getInstr()->operands())
    1749         863 :       if (MO.isReg() && MO.isDef() && !MO.isDead()) {
    1750         104 :         unsigned Reg = MO.getReg();
    1751         104 :         if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    1752         104 :           if (!Uses.count(Reg))
    1753          18 :             LiveOutRegs.push_back(RegisterMaskPair(Reg,
    1754             :                                                    LaneBitmask::getNone()));
    1755           0 :         } else if (MRI.isAllocatable(Reg)) {
    1756           0 :           for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
    1757           0 :             if (!Uses.count(*Units))
    1758           0 :               LiveOutRegs.push_back(RegisterMaskPair(*Units,
    1759             :                                                      LaneBitmask::getNone()));
    1760             :         }
    1761             :       }
    1762          19 :   RPTracker.addLiveRegs(LiveOutRegs);
    1763          19 : }
    1764             : 
    1765             : /// A heuristic to filter nodes in recurrent node-sets if the register
    1766             : /// pressure of a set is too high.
    1767         114 : void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
    1768         530 :   for (auto &NS : NodeSets) {
    1769             :     // Skip small node-sets since they won't cause register pressure problems.
    1770         208 :     if (NS.size() <= 2)
    1771         189 :       continue;
    1772             :     IntervalPressure RecRegPressure;
    1773          19 :     RegPressureTracker RecRPTracker(RecRegPressure);
    1774          38 :     RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
    1775          19 :     computeLiveOuts(MF, RecRPTracker, NS);
    1776          19 :     RecRPTracker.closeBottom();
    1777             : 
    1778             :     std::vector<SUnit *> SUnits(NS.begin(), NS.end());
    1779             :     std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
    1780             :       return A->NodeNum > B->NodeNum;
    1781             :     });
    1782             : 
    1783         127 :     for (auto &SU : SUnits) {
    1784             :       // Since we're computing the register pressure for a subset of the
    1785             :       // instructions in a block, we need to set the tracker for each
    1786             :       // instruction in the node-set. The tracker is set to the instruction
    1787             :       // just after the one we're interested in.
    1788         109 :       MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
    1789             :       RecRPTracker.setPos(std::next(CurInstI));
    1790             : 
    1791         109 :       RegPressureDelta RPDelta;
    1792         109 :       ArrayRef<PressureChange> CriticalPSets;
    1793         109 :       RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
    1794             :                                              CriticalPSets,
    1795             :                                              RecRegPressure.MaxSetPressure);
    1796         109 :       if (RPDelta.Excess.isValid()) {
    1797             :         DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
    1798             :                      << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
    1799             :                      << ":" << RPDelta.Excess.getUnitInc());
    1800           1 :         NS.setExceedPressure(SU);
    1801           1 :         break;
    1802             :       }
    1803         108 :       RecRPTracker.recede();
    1804             :     }
    1805             :   }
    1806         114 : }
    1807             : 
    1808             : /// A heuristic to colocate node sets that have the same set of
    1809             : /// successors.
    1810         114 : void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
    1811             :   unsigned Colocate = 0;
    1812         322 :   for (int i = 0, e = NodeSets.size(); i < e; ++i) {
    1813         208 :     NodeSet &N1 = NodeSets[i];
    1814             :     SmallSetVector<SUnit *, 8> S1;
    1815         416 :     if (N1.empty() || !succ_L(N1, S1))
    1816             :       continue;
    1817         281 :     for (int j = i + 1; j < e; ++j) {
    1818         132 :       NodeSet &N2 = NodeSets[j];
    1819         132 :       if (N1.compareRecMII(N2) != 0)
    1820          63 :         continue;
    1821             :       SmallSetVector<SUnit *, 8> S2;
    1822         225 :       if (N2.empty() || !succ_L(N2, S2))
    1823             :         continue;
    1824          79 :       if (isSubset(S1, S2) && S1.size() == S2.size()) {
    1825           7 :         N1.setColocate(++Colocate);
    1826             :         N2.setColocate(Colocate);
    1827             :         break;
    1828             :       }
    1829             :     }
    1830             :   }
    1831         114 : }
    1832             : 
    1833             : /// Check if the existing node-sets are profitable. If not, then ignore the
    1834             : /// recurrent node-sets, and attempt to schedule all nodes together. This is
    1835             : /// a heuristic. If the MII is large and there is a non-recurrent node with
    1836             : /// a large depth compared to the MII, then it's best to try and schedule
    1837             : /// all instruction together instead of starting with the recurrent node-sets.
    1838         114 : void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
    1839             :   // Look for loops with a large MII.
    1840         114 :   if (MII <= 20)
    1841             :     return;
    1842             :   // Check if the node-set contains only a simple add recurrence.
    1843           0 :   for (auto &NS : NodeSets)
    1844           0 :     if (NS.size() > 2)
    1845             :       return;
    1846             :   // If the depth of any instruction is significantly larger than the MII, then
    1847             :   // ignore the recurrent node-sets and treat all instructions equally.
    1848           0 :   for (auto &SU : SUnits)
    1849           0 :     if (SU.getDepth() > MII * 1.5) {
    1850             :       NodeSets.clear();
    1851             :       DEBUG(dbgs() << "Clear recurrence node-sets\n");
    1852             :       return;
    1853             :     }
    1854             : }
    1855             : 
    1856             : /// Add the nodes that do not belong to a recurrence set into groups
    1857             : /// based upon connected componenets.
    1858         114 : void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
    1859         114 :   SetVector<SUnit *> NodesAdded;
    1860             :   SmallPtrSet<SUnit *, 8> Visited;
    1861             :   // Add the nodes that are on a path between the previous node sets and
    1862             :   // the current node set.
    1863         530 :   for (NodeSet &I : NodeSets) {
    1864             :     SmallSetVector<SUnit *, 8> N;
    1865             :     // Add the nodes from the current node set to the previous node set.
    1866         208 :     if (succ_L(I, N)) {
    1867         156 :       SetVector<SUnit *> Path;
    1868         738 :       for (SUnit *NI : N) {
    1869         291 :         Visited.clear();
    1870         291 :         computePath(NI, Path, NodesAdded, I, Visited);
    1871             :       }
    1872         156 :       if (!Path.empty())
    1873             :         I.insert(Path.begin(), Path.end());
    1874             :     }
    1875             :     // Add the nodes from the previous node set to the current node set.
    1876             :     N.clear();
    1877         208 :     if (succ_L(NodesAdded, N)) {
    1878          51 :       SetVector<SUnit *> Path;
    1879         275 :       for (SUnit *NI : N) {
    1880         112 :         Visited.clear();
    1881         112 :         computePath(NI, Path, I, NodesAdded, Visited);
    1882             :       }
    1883          51 :       if (!Path.empty())
    1884             :         I.insert(Path.begin(), Path.end());
    1885             :     }
    1886         208 :     NodesAdded.insert(I.begin(), I.end());
    1887             :   }
    1888             : 
    1889             :   // Create a new node set with the connected nodes of any successor of a node
    1890             :   // in a recurrent set.
    1891             :   NodeSet NewSet;
    1892             :   SmallSetVector<SUnit *, 8> N;
    1893         114 :   if (succ_L(NodesAdded, N))
    1894         316 :     for (SUnit *I : N)
    1895         127 :       addConnectedNodes(I, NewSet, NodesAdded);
    1896         114 :   if (!NewSet.empty())
    1897          62 :     NodeSets.push_back(NewSet);
    1898             : 
    1899             :   // Create a new node set with the connected nodes of any predecessor of a node
    1900             :   // in a recurrent set.
    1901             :   NewSet.clear();
    1902         114 :   if (pred_L(NodesAdded, N))
    1903           6 :     for (SUnit *I : N)
    1904           2 :       addConnectedNodes(I, NewSet, NodesAdded);
    1905         114 :   if (!NewSet.empty())
    1906           2 :     NodeSets.push_back(NewSet);
    1907             : 
    1908             :   // Create new nodes sets with the connected nodes any any remaining node that
    1909             :   // has no predecessor.
    1910        2868 :   for (unsigned i = 0; i < SUnits.size(); ++i) {
    1911             :     SUnit *SU = &SUnits[i];
    1912         880 :     if (NodesAdded.count(SU) == 0) {
    1913             :       NewSet.clear();
    1914           2 :       addConnectedNodes(SU, NewSet, NodesAdded);
    1915           2 :       if (!NewSet.empty())
    1916           2 :         NodeSets.push_back(NewSet);
    1917             :     }
    1918             :   }
    1919         114 : }
    1920             : 
    1921             : /// Add the node to the set, and add all is its connected nodes to the set.
    1922         409 : void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
    1923             :                                           SetVector<SUnit *> &NodesAdded) {
    1924         409 :   NewSet.insert(SU);
    1925         409 :   NodesAdded.insert(SU);
    1926        1808 :   for (auto &SI : SU->Succs) {
    1927             :     SUnit *Successor = SI.getSUnit();
    1928         495 :     if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
    1929         155 :       addConnectedNodes(Successor, NewSet, NodesAdded);
    1930             :   }
    1931        2012 :   for (auto &PI : SU->Preds) {
    1932             :     SUnit *Predecessor = PI.getSUnit();
    1933         597 :     if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
    1934         123 :       addConnectedNodes(Predecessor, NewSet, NodesAdded);
    1935             :   }
    1936         409 : }
    1937             : 
    1938             : /// Return true if Set1 contains elements in Set2. The elements in common
    1939             : /// are returned in a different container.
    1940         143 : static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
    1941             :                         SmallSetVector<SUnit *, 8> &Result) {
    1942         143 :   Result.clear();
    1943         204 :   for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
    1944         122 :     SUnit *SU = Set1[i];
    1945             :     if (Set2.count(SU) != 0)
    1946           8 :       Result.insert(SU);
    1947             :   }
    1948         143 :   return !Result.empty();
    1949             : }
    1950             : 
    1951             : /// Merge the recurrence node sets that have the same initial node.
    1952         114 : void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
    1953         322 :   for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
    1954             :        ++I) {
    1955             :     NodeSet &NI = *I;
    1956         208 :     for (NodeSetType::iterator J = I + 1; J != E;) {
    1957             :       NodeSet &NJ = *J;
    1958         514 :       if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
    1959          50 :         if (NJ.compareRecMII(NI) > 0)
    1960             :           NI.setRecMII(NJ.getRecMII());
    1961         170 :         for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
    1962             :              ++NII)
    1963         145 :           I->insert(*NII);
    1964          25 :         NodeSets.erase(J);
    1965             :         E = NodeSets.end();
    1966             :       } else {
    1967         232 :         ++J;
    1968             :       }
    1969             :     }
    1970             :   }
    1971         114 : }
    1972             : 
    1973             : /// Remove nodes that have been scheduled in previous NodeSets.
    1974         114 : void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
    1975         754 :   for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
    1976             :        ++I)
    1977         263 :     for (NodeSetType::iterator J = I + 1; J != E;) {
    1978         893 :       J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
    1979             : 
    1980         209 :       if (J->empty()) {
    1981          11 :         NodeSets.erase(J);
    1982             :         E = NodeSets.end();
    1983             :       } else {
    1984         198 :         ++J;
    1985             :       }
    1986             :     }
    1987         114 : }
    1988             : 
    1989             : /// Return true if Inst1 defines a value that is used in Inst2.
    1990             : static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
    1991        4401 :   for (auto &SI : Inst1->Succs)
    1992        1562 :     if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
    1993             :       return true;
    1994             :   return false;
    1995             : }
    1996             : 
    1997             : /// Compute an ordered list of the dependence graph nodes, which
    1998             : /// indicates the order that the nodes will be scheduled.  This is a
    1999             : /// two-level algorithm. First, a partial order is created, which
    2000             : /// consists of a list of sets ordered from highest to lowest priority.
    2001         114 : void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
    2002             :   SmallSetVector<SUnit *, 8> R;
    2003         114 :   NodeOrder.clear();
    2004             : 
    2005         640 :   for (auto &Nodes : NodeSets) {
    2006             :     DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
    2007             :     OrderKind Order;
    2008             :     SmallSetVector<SUnit *, 8> N;
    2009         263 :     if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
    2010          44 :       R.insert(N.begin(), N.end());
    2011             :       Order = BottomUp;
    2012             :       DEBUG(dbgs() << "  Bottom up (preds) ");
    2013         219 :     } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
    2014          76 :       R.insert(N.begin(), N.end());
    2015             :       Order = TopDown;
    2016             :       DEBUG(dbgs() << "  Top down (succs) ");
    2017         143 :     } else if (isIntersect(N, Nodes, R)) {
    2018             :       // If some of the successors are in the existing node-set, then use the
    2019             :       // top-down ordering.
    2020             :       Order = TopDown;
    2021             :       DEBUG(dbgs() << "  Top down (intersect) ");
    2022         137 :     } else if (NodeSets.size() == 1) {
    2023           2 :       for (auto &N : Nodes)
    2024           2 :         if (N->Succs.size() == 0)
    2025           1 :           R.insert(N);
    2026             :       Order = BottomUp;
    2027             :       DEBUG(dbgs() << "  Bottom up (all) ");
    2028             :     } else {
    2029             :       // Find the node with the highest ASAP.
    2030         136 :       SUnit *maxASAP = nullptr;
    2031         458 :       for (SUnit *SU : Nodes) {
    2032         694 :         if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
    2033         315 :           maxASAP = SU;
    2034             :       }
    2035         136 :       R.insert(maxASAP);
    2036             :       Order = BottomUp;
    2037             :       DEBUG(dbgs() << "  Bottom up (default) ");
    2038             :     }
    2039             : 
    2040         532 :     while (!R.empty()) {
    2041         269 :       if (Order == TopDown) {
    2042             :         // Choose the node with the maximum height.  If more than one, choose
    2043             :         // the node with the lowest MOV. If still more than one, check if there
    2044             :         // is a dependence between the instructions.
    2045         915 :         while (!R.empty()) {
    2046         416 :           SUnit *maxHeight = nullptr;
    2047        4522 :           for (SUnit *I : R) {
    2048        5327 :             if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
    2049         664 :               maxHeight = I;
    2050        2172 :             else if (getHeight(I) == getHeight(maxHeight) &&
    2051        2208 :                      getMOV(I) < getMOV(maxHeight) &&
    2052             :                      !hasDataDependence(maxHeight, I))
    2053          31 :               maxHeight = I;
    2054        2716 :             else if (hasDataDependence(I, maxHeight))
    2055          28 :               maxHeight = I;
    2056             :           }
    2057         416 :           NodeOrder.insert(maxHeight);
    2058             :           DEBUG(dbgs() << maxHeight->NodeNum << " ");
    2059         416 :           R.remove(maxHeight);
    2060        1776 :           for (const auto &I : maxHeight->Succs) {
    2061          29 :             if (Nodes.count(I.getSUnit()) == 0)
    2062          29 :               continue;
    2063         443 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2064          47 :               continue;
    2065           0 :             if (ignoreDependence(I, false))
    2066           0 :               continue;
    2067         396 :             R.insert(I.getSUnit());
    2068             :           }
    2069             :           // Back-edges are predecessors with an anti-dependence.
    2070        2114 :           for (const auto &I : maxHeight->Preds) {
    2071         641 :             if (I.getKind() != SDep::Anti)
    2072         617 :               continue;
    2073           1 :             if (Nodes.count(I.getSUnit()) == 0)
    2074           1 :               continue;
    2075          23 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2076           0 :               continue;
    2077          23 :             R.insert(I.getSUnit());
    2078             :           }
    2079             :         }
    2080             :         Order = BottomUp;
    2081             :         DEBUG(dbgs() << "\n   Switching order to bottom up ");
    2082             :         SmallSetVector<SUnit *, 8> N;
    2083          83 :         if (pred_L(NodeOrder, N, &Nodes))
    2084           5 :           R.insert(N.begin(), N.end());
    2085             :       } else {
    2086             :         // Choose the node with the maximum depth.  If more than one, choose
    2087             :         // the node with the lowest MOV. If there is still more than one, check
    2088             :         // for a dependence between the instructions.
    2089        1114 :         while (!R.empty()) {
    2090         464 :           SUnit *maxDepth = nullptr;
    2091        1548 :           for (SUnit *I : R) {
    2092         698 :             if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
    2093         485 :               maxDepth = I;
    2094          91 :             else if (getDepth(I) == getDepth(maxDepth) &&
    2095          97 :                      getMOV(I) < getMOV(maxDepth) &&
    2096             :                      !hasDataDependence(I, maxDepth))
    2097           4 :               maxDepth = I;
    2098         106 :             else if (hasDataDependence(maxDepth, I))
    2099           9 :               maxDepth = I;
    2100             :           }
    2101         464 :           NodeOrder.insert(maxDepth);
    2102             :           DEBUG(dbgs() << maxDepth->NodeNum << " ");
    2103         464 :           R.remove(maxDepth);
    2104         464 :           if (Nodes.isExceedSU(maxDepth)) {
    2105             :             Order = TopDown;
    2106             :             R.clear();
    2107           0 :             R.insert(Nodes.getNode(0));
    2108           0 :             break;
    2109             :           }
    2110        1622 :           for (const auto &I : maxDepth->Preds) {
    2111         107 :             if (Nodes.count(I.getSUnit()) == 0)
    2112         107 :               continue;
    2113         472 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2114          60 :               continue;
    2115         412 :             if (I.getKind() == SDep::Anti)
    2116         123 :               continue;
    2117         289 :             R.insert(I.getSUnit());
    2118             :           }
    2119             :           // Back-edges are predecessors with an anti-dependence.
    2120        2424 :           for (const auto &I : maxDepth->Succs) {
    2121         748 :             if (I.getKind() != SDep::Anti)
    2122         581 :               continue;
    2123          15 :             if (Nodes.count(I.getSUnit()) == 0)
    2124          15 :               continue;
    2125         152 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2126         123 :               continue;
    2127          29 :             R.insert(I.getSUnit());
    2128             :           }
    2129             :         }
    2130             :         Order = TopDown;
    2131             :         DEBUG(dbgs() << "\n   Switching order to top down ");
    2132             :         SmallSetVector<SUnit *, 8> N;
    2133         186 :         if (succ_L(NodeOrder, N, &Nodes))
    2134           1 :           R.insert(N.begin(), N.end());
    2135             :       }
    2136             :     }
    2137             :     DEBUG(dbgs() << "\nDone with Nodeset\n");
    2138             :   }
    2139             : 
    2140             :   DEBUG({
    2141             :     dbgs() << "Node order: ";
    2142             :     for (SUnit *I : NodeOrder)
    2143             :       dbgs() << " " << I->NodeNum << " ";
    2144             :     dbgs() << "\n";
    2145             :   });
    2146         114 : }
    2147             : 
    2148             : /// Process the nodes in the computed order and create the pipelined schedule
    2149             : /// of the instructions, if possible. Return true if a schedule is found.
    2150         114 : bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
    2151         114 :   if (NodeOrder.empty())
    2152             :     return false;
    2153             : 
    2154             :   bool scheduleFound = false;
    2155             :   // Keep increasing II until a valid schedule is found.
    2156         271 :   for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
    2157         157 :     Schedule.reset();
    2158         157 :     Schedule.setInitiationInterval(II);
    2159             :     DEBUG(dbgs() << "Try to schedule with " << II << "\n");
    2160             : 
    2161             :     SetVector<SUnit *>::iterator NI = NodeOrder.begin();
    2162             :     SetVector<SUnit *>::iterator NE = NodeOrder.end();
    2163             :     do {
    2164        1499 :       SUnit *SU = *NI;
    2165             : 
    2166             :       // Compute the schedule time for the instruction, which is based
    2167             :       // upon the scheduled time for any predecessors/successors.
    2168        1499 :       int EarlyStart = INT_MIN;
    2169        1499 :       int LateStart = INT_MAX;
    2170             :       // These values are set when the size of the schedule window is limited
    2171             :       // due to chain dependences.
    2172        1499 :       int SchedEnd = INT_MAX;
    2173        1499 :       int SchedStart = INT_MIN;
    2174        1499 :       Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
    2175             :                             II, this);
    2176             :       DEBUG({
    2177             :         dbgs() << "Inst (" << SU->NodeNum << ") ";
    2178             :         SU->getInstr()->dump();
    2179             :         dbgs() << "\n";
    2180             :       });
    2181             :       DEBUG({
    2182             :         dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
    2183             :                << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
    2184             :       });
    2185             : 
    2186        2968 :       if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
    2187        1469 :           SchedStart > LateStart)
    2188             :         scheduleFound = false;
    2189        1467 :       else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
    2190        1574 :         SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
    2191         787 :         scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
    2192         680 :       } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
    2193         428 :         SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
    2194         214 :         scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
    2195         466 :       } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
    2196         265 :         SchedEnd =
    2197         795 :             std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
    2198             :         // When scheduling a Phi it is better to start at the late cycle and go
    2199             :         // backwards. The default order may insert the Phi too far away from
    2200             :         // its first dependence.
    2201         265 :         if (SU->getInstr()->isPHI())
    2202         218 :           scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
    2203             :         else
    2204          47 :           scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
    2205             :       } else {
    2206         201 :         int FirstCycle = Schedule.getFirstCycle();
    2207         201 :         scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
    2208         402 :                                         FirstCycle + getASAP(SU) + II - 1, II);
    2209             :       }
    2210             :       // Even if we find a schedule, make sure the schedule doesn't exceed the
    2211             :       // allowable number of stages. We keep trying if this happens.
    2212        1467 :       if (scheduleFound)
    2213        2904 :         if (SwpMaxStages > -1 &&
    2214        2904 :             Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
    2215             :           scheduleFound = false;
    2216             : 
    2217             :       DEBUG({
    2218             :         if (!scheduleFound)
    2219             :           dbgs() << "\tCan't schedule\n";
    2220             :       });
    2221        1499 :     } while (++NI != NE && scheduleFound);
    2222             : 
    2223             :     // If a schedule is found, check if it is a valid schedule too.
    2224         157 :     if (scheduleFound)
    2225         110 :       scheduleFound = Schedule.isValidSchedule(this);
    2226             :   }
    2227             : 
    2228             :   DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
    2229             : 
    2230         114 :   if (scheduleFound)
    2231         110 :     Schedule.finalizeSchedule(this);
    2232             :   else
    2233           4 :     Schedule.reset();
    2234             : 
    2235         224 :   return scheduleFound && Schedule.getMaxStageCount() > 0;
    2236             : }
    2237             : 
    2238             : /// Given a schedule for the loop, generate a new version of the loop,
    2239             : /// and replace the old version.  This function generates a prolog
    2240             : /// that contains the initial iterations in the pipeline, and kernel
    2241             : /// loop, and the epilogue that contains the code for the final
    2242             : /// iterations.
    2243          87 : void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
    2244             :   // Create a new basic block for the kernel and add it to the CFG.
    2245          87 :   MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
    2246             : 
    2247          87 :   unsigned MaxStageCount = Schedule.getMaxStageCount();
    2248             : 
    2249             :   // Remember the registers that are used in different stages. The index is
    2250             :   // the iteration, or stage, that the instruction is scheduled in.  This is
    2251             :   // a map between register names in the orignal block and the names created
    2252             :   // in each stage of the pipelined loop.
    2253         459 :   ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
    2254             :   InstrMapTy InstrMap;
    2255             : 
    2256             :   SmallVector<MachineBasicBlock *, 4> PrologBBs;
    2257             :   // Generate the prolog instructions that set up the pipeline.
    2258          87 :   generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
    2259          87 :   MF.insert(BB->getIterator(), KernelBB);
    2260             : 
    2261             :   // Rearrange the instructions to generate the new, pipelined loop,
    2262             :   // and update register names as needed.
    2263         241 :   for (int Cycle = Schedule.getFirstCycle(),
    2264          87 :            LastCycle = Schedule.getFinalCycle();
    2265         241 :        Cycle <= LastCycle; ++Cycle) {
    2266             :     std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
    2267             :     // This inner loop schedules each instruction in the cycle.
    2268         725 :     for (SUnit *CI : CycleInstrs) {
    2269         717 :       if (CI->getInstr()->isPHI())
    2270         146 :         continue;
    2271         425 :       unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
    2272         425 :       MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
    2273         425 :       updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
    2274         425 :       KernelBB->push_back(NewMI);
    2275         425 :       InstrMap[NewMI] = CI->getInstr();
    2276             :     }
    2277             :   }
    2278             : 
    2279             :   // Copy any terminator instructions to the new kernel, and update
    2280             :   // names as needed.
    2281          87 :   for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
    2282          87 :                                    E = BB->instr_end();
    2283         261 :        I != E; ++I) {
    2284         174 :     MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
    2285         174 :     updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
    2286         174 :     KernelBB->push_back(NewMI);
    2287         174 :     InstrMap[NewMI] = &*I;
    2288             :   }
    2289             : 
    2290          87 :   KernelBB->transferSuccessors(BB);
    2291          87 :   KernelBB->replaceSuccessor(BB, KernelBB);
    2292             : 
    2293          87 :   generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
    2294             :                        VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
    2295          87 :   generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
    2296             :                InstrMap, MaxStageCount, MaxStageCount, false);
    2297             : 
    2298             :   DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
    2299             : 
    2300             :   SmallVector<MachineBasicBlock *, 4> EpilogBBs;
    2301             :   // Generate the epilog instructions to complete the pipeline.
    2302          87 :   generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
    2303             :                  PrologBBs);
    2304             : 
    2305             :   // We need this step because the register allocation doesn't handle some
    2306             :   // situations well, so we insert copies to help out.
    2307          87 :   splitLifetimes(KernelBB, EpilogBBs, Schedule);
    2308             : 
    2309             :   // Remove dead instructions due to loop induction variables.
    2310          87 :   removeDeadInstructions(KernelBB, EpilogBBs);
    2311             : 
    2312             :   // Add branches between prolog and epilog blocks.
    2313          87 :   addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
    2314             : 
    2315             :   // Remove the original loop since it's no longer referenced.
    2316          87 :   BB->clear();
    2317          87 :   BB->eraseFromParent();
    2318             : 
    2319         174 :   delete[] VRMap;
    2320          87 : }
    2321             : 
    2322             : /// Generate the pipeline prolog code.
    2323          87 : void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
    2324             :                                        MachineBasicBlock *KernelBB,
    2325             :                                        ValueMapTy *VRMap,
    2326             :                                        MBBVectorTy &PrologBBs) {
    2327         174 :   MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
    2328             :   assert(PreheaderBB != nullptr &&
    2329             :          "Need to add code to handle loops w/o preheader");
    2330             :   MachineBasicBlock *PredBB = PreheaderBB;
    2331             :   InstrMapTy InstrMap;
    2332             : 
    2333             :   // Generate a basic block for each stage, not including the last stage,
    2334             :   // which will be generated in the kernel. Each basic block may contain
    2335             :   // instructions from multiple stages/iterations.
    2336         285 :   for (unsigned i = 0; i < LastStage; ++i) {
    2337             :     // Create and insert the prolog basic block prior to the original loop
    2338             :     // basic block.  The original loop is removed later.
    2339          99 :     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
    2340          99 :     PrologBBs.push_back(NewBB);
    2341          99 :     MF.insert(BB->getIterator(), NewBB);
    2342          99 :     NewBB->transferSuccessors(PredBB);
    2343          99 :     PredBB->addSuccessor(NewBB);
    2344          99 :     PredBB = NewBB;
    2345             : 
    2346             :     // Generate instructions for each appropriate stage. Process instructions
    2347             :     // in original program order.
    2348         213 :     for (int StageNum = i; StageNum >= 0; --StageNum) {
    2349         114 :       for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
    2350         114 :                                        BBE = BB->getFirstTerminator();
    2351         943 :            BBI != BBE; ++BBI) {
    2352        1658 :         if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
    2353         132 :           if (BBI->isPHI())
    2354         132 :             continue;
    2355             :           MachineInstr *NewMI =
    2356         338 :               cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
    2357         338 :           updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
    2358             :                             VRMap);
    2359         338 :           NewBB->push_back(NewMI);
    2360         338 :           InstrMap[NewMI] = &*BBI;
    2361             :         }
    2362             :       }
    2363             :     }
    2364          99 :     rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
    2365             :     DEBUG({
    2366             :       dbgs() << "prolog:\n";
    2367             :       NewBB->dump();
    2368             :     });
    2369             :   }
    2370             : 
    2371          87 :   PredBB->replaceSuccessor(BB, KernelBB);
    2372             : 
    2373             :   // Check if we need to remove the branch from the preheader to the original
    2374             :   // loop, and replace it with a branch to the new loop.
    2375          87 :   unsigned numBranches = TII->removeBranch(*PreheaderBB);
    2376          87 :   if (numBranches) {
    2377             :     SmallVector<MachineOperand, 0> Cond;
    2378          48 :     TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
    2379             :   }
    2380          87 : }
    2381             : 
    2382             : /// Generate the pipeline epilog code. The epilog code finishes the iterations
    2383             : /// that were started in either the prolog or the kernel.  We create a basic
    2384             : /// block for each stage that needs to complete.
    2385          87 : void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
    2386             :                                        MachineBasicBlock *KernelBB,
    2387             :                                        ValueMapTy *VRMap,
    2388             :                                        MBBVectorTy &EpilogBBs,
    2389             :                                        MBBVectorTy &PrologBBs) {
    2390             :   // We need to change the branch from the kernel to the first epilog block, so
    2391             :   // this call to analyze branch uses the kernel rather than the original BB.
    2392          87 :   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    2393             :   SmallVector<MachineOperand, 4> Cond;
    2394          87 :   bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
    2395             :   assert(!checkBranch && "generateEpilog must be able to analyze the branch");
    2396          87 :   if (checkBranch)
    2397             :     return;
    2398             : 
    2399             :   MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
    2400          87 :   if (*LoopExitI == KernelBB)
    2401             :     ++LoopExitI;
    2402             :   assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
    2403          87 :   MachineBasicBlock *LoopExitBB = *LoopExitI;
    2404             : 
    2405             :   MachineBasicBlock *PredBB = KernelBB;
    2406             :   MachineBasicBlock *EpilogStart = LoopExitBB;
    2407             :   InstrMapTy InstrMap;
    2408             : 
    2409             :   // Generate a basic block for each stage, not including the last stage,
    2410             :   // which was generated for the kernel.  Each basic block may contain
    2411             :   // instructions from multiple stages/iterations.
    2412          87 :   int EpilogStage = LastStage + 1;
    2413         285 :   for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
    2414          99 :     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
    2415          99 :     EpilogBBs.push_back(NewBB);
    2416          99 :     MF.insert(BB->getIterator(), NewBB);
    2417             : 
    2418          99 :     PredBB->replaceSuccessor(LoopExitBB, NewBB);
    2419          99 :     NewBB->addSuccessor(LoopExitBB);
    2420             : 
    2421          99 :     if (EpilogStart == LoopExitBB)
    2422          87 :       EpilogStart = NewBB;
    2423             : 
    2424             :     // Add instructions to the epilog depending on the current block.
    2425             :     // Process instructions in original program order.
    2426         327 :     for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
    2427        1399 :       for (auto &BBI : *BB) {
    2428         220 :         if (BBI.isPHI())
    2429         220 :           continue;
    2430             :         MachineInstr *In = &BBI;
    2431         837 :         if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
    2432         172 :           MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
    2433         172 :           updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
    2434         172 :           NewBB->push_back(NewMI);
    2435         172 :           InstrMap[NewMI] = In;
    2436             :         }
    2437             :       }
    2438             :     }
    2439         198 :     generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
    2440             :                          VRMap, InstrMap, LastStage, EpilogStage, i == 1);
    2441          99 :     generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
    2442             :                  InstrMap, LastStage, EpilogStage, i == 1);
    2443          99 :     PredBB = NewBB;
    2444             : 
    2445             :     DEBUG({
    2446             :       dbgs() << "epilog:\n";
    2447             :       NewBB->dump();
    2448             :     });
    2449             :   }
    2450             : 
    2451             :   // Fix any Phi nodes in the loop exit block.
    2452         183 :   for (MachineInstr &MI : *LoopExitBB) {
    2453             :     if (!MI.isPHI())
    2454             :       break;
    2455          30 :     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
    2456          21 :       MachineOperand &MO = MI.getOperand(i);
    2457          21 :       if (MO.getMBB() == BB)
    2458             :         MO.setMBB(PredBB);
    2459             :     }
    2460             :   }
    2461             : 
    2462             :   // Create a branch to the new epilog from the kernel.
    2463             :   // Remove the original branch and add a new branch to the epilog.
    2464          87 :   TII->removeBranch(*KernelBB);
    2465         261 :   TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
    2466             :   // Add a branch to the loop exit.
    2467          87 :   if (EpilogBBs.size() > 0) {
    2468          87 :     MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
    2469             :     SmallVector<MachineOperand, 4> Cond1;
    2470         261 :     TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
    2471             :   }
    2472             : }
    2473             : 
    2474             : /// Replace all uses of FromReg that appear outside the specified
    2475             : /// basic block with ToReg.
    2476         358 : static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
    2477             :                                     MachineBasicBlock *MBB,
    2478             :                                     MachineRegisterInfo &MRI,
    2479             :                                     LiveIntervals &LIS) {
    2480         358 :   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
    2481             :                                          E = MRI.use_end();
    2482         897 :        I != E;) {
    2483             :     MachineOperand &O = *I;
    2484             :     ++I;
    2485         539 :     if (O.getParent()->getParent() != MBB)
    2486          29 :       O.setReg(ToReg);
    2487             :   }
    2488             :   if (!LIS.hasInterval(ToReg))
    2489         358 :     LIS.createEmptyInterval(ToReg);
    2490         358 : }
    2491             : 
    2492             : /// Return true if the register has a use that occurs outside the
    2493             : /// specified loop.
    2494         196 : static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
    2495             :                             MachineRegisterInfo &MRI) {
    2496         196 :   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
    2497             :                                          E = MRI.use_end();
    2498         411 :        I != E; ++I)
    2499         215 :     if (I->getParent()->getParent() != BB)
    2500             :       return true;
    2501             :   return false;
    2502             : }
    2503             : 
    2504             : /// Generate Phis for the specific block in the generated pipelined code.
    2505             : /// This function looks at the Phis from the original code to guide the
    2506             : /// creation of new Phis.
    2507         186 : void SwingSchedulerDAG::generateExistingPhis(
    2508             :     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
    2509             :     MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
    2510             :     InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
    2511             :     bool IsLast) {
    2512             :   // Compute the stage number for the initial value of the Phi, which
    2513             :   // comes from the prolog. The prolog to use depends on to which kernel/
    2514             :   // epilog that we're adding the Phi.
    2515             :   unsigned PrologStage = 0;
    2516             :   unsigned PrevStage = 0;
    2517         186 :   bool InKernel = (LastStageNum == CurStageNum);
    2518         186 :   if (InKernel) {
    2519          87 :     PrologStage = LastStageNum - 1;
    2520             :     PrevStage = CurStageNum;
    2521             :   } else {
    2522          99 :     PrologStage = LastStageNum - (CurStageNum - LastStageNum);
    2523          99 :     PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
    2524             :   }
    2525             : 
    2526         186 :   for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
    2527         186 :                                    BBE = BB->getFirstNonPHI();
    2528         512 :        BBI != BBE; ++BBI) {
    2529         326 :     unsigned Def = BBI->getOperand(0).getReg();
    2530             : 
    2531             :     unsigned InitVal = 0;
    2532         326 :     unsigned LoopVal = 0;
    2533         326 :     getPhiRegs(*BBI, BB, InitVal, LoopVal);
    2534             : 
    2535         326 :     unsigned PhiOp1 = 0;
    2536             :     // The Phi value from the loop body typically is defined in the loop, but
    2537             :     // not always. So, we need to check if the value is defined in the loop.
    2538         326 :     unsigned PhiOp2 = LoopVal;
    2539         326 :     if (VRMap[LastStageNum].count(LoopVal))
    2540         323 :       PhiOp2 = VRMap[LastStageNum][LoopVal];
    2541             : 
    2542         326 :     int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
    2543             :     int LoopValStage =
    2544         652 :         Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
    2545         326 :     unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
    2546         326 :     if (NumStages == 0) {
    2547             :       // We don't need to generate a Phi anymore, but we need to rename any uses
    2548             :       // of the Phi value.
    2549           4 :       unsigned NewReg = VRMap[PrevStage][LoopVal];
    2550           4 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
    2551             :                             Def, NewReg);
    2552           2 :       if (VRMap[CurStageNum].count(LoopVal))
    2553           2 :         VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
    2554             :     }
    2555             :     // Adjust the number of Phis needed depending on the number of prologs left,
    2556             :     // and the distance from where the Phi is first scheduled.
    2557         326 :     unsigned NumPhis = NumStages;
    2558         326 :     if (!InKernel && (int)PrologStage < LoopValStage)
    2559             :       // The NumPhis is the maximum number of new Phis needed during the steady
    2560             :       // state. If the Phi has not been scheduled in current prolog, then we
    2561             :       // need to generate less Phis.
    2562         100 :       NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
    2563             :     // The number of Phis cannot exceed the number of prolog stages. Each
    2564             :     // stage can potentially define two values.
    2565         652 :     NumPhis = std::min(NumPhis, PrologStage + 2);
    2566             : 
    2567             :     unsigned NewReg = 0;
    2568             : 
    2569         326 :     unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
    2570             :     // In the epilog, we may need to look back one stage to get the correct
    2571             :     // Phi name because the epilog and prolog blocks execute the same stage.
    2572             :     // The correct name is from the previous block only when the Phi has
    2573             :     // been completely scheduled prior to the epilog, and Phi value is not
    2574             :     // needed in multiple stages.
    2575             :     int StageDiff = 0;
    2576         326 :     if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
    2577             :         NumPhis == 1)
    2578             :       StageDiff = 1;
    2579             :     // Adjust the computations below when the phi and the loop definition
    2580             :     // are scheduled in different stages.
    2581         326 :     if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
    2582           0 :       StageDiff = StageScheduled - LoopValStage;
    2583        1012 :     for (unsigned np = 0; np < NumPhis; ++np) {
    2584             :       // If the Phi hasn't been scheduled, then use the initial Phi operand
    2585             :       // value. Otherwise, use the scheduled version of the instruction. This
    2586             :       // is a little complicated when a Phi references another Phi.
    2587         343 :       if (np > PrologStage || StageScheduled >= (int)LastStageNum)
    2588         101 :         PhiOp1 = InitVal;
    2589             :       // Check if the Phi has already been scheduled in a prolog stage.
    2590         242 :       else if (PrologStage >= AccessStage + StageDiff + np &&
    2591         142 :                VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
    2592         132 :         PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
    2593             :       // Check if the Phi has already been scheduled, but the loop intruction
    2594             :       // is either another Phi, or doesn't occur in the loop.
    2595         110 :       else if (PrologStage >= AccessStage + StageDiff + np) {
    2596             :         // If the Phi references another Phi, we need to examine the other
    2597             :         // Phi to get the correct value.
    2598          10 :         PhiOp1 = LoopVal;
    2599          10 :         MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
    2600             :         int Indirects = 1;
    2601          28 :         while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
    2602           8 :           int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
    2603           8 :           if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
    2604           8 :             PhiOp1 = getInitPhiReg(*InstOp1, BB);
    2605             :           else
    2606           8 :             PhiOp1 = getLoopPhiReg(*InstOp1, BB);
    2607           8 :           InstOp1 = MRI.getVRegDef(PhiOp1);
    2608           8 :           int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
    2609           8 :           int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
    2610           8 :           if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
    2611           4 :               VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
    2612           4 :             PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
    2613           4 :             break;
    2614             :           }
    2615           4 :           ++Indirects;
    2616             :         }
    2617             :       } else
    2618         100 :         PhiOp1 = InitVal;
    2619             :       // If this references a generated Phi in the kernel, get the Phi operand
    2620             :       // from the incoming block.
    2621         343 :       if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
    2622          14 :         if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
    2623           1 :           PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
    2624             : 
    2625         343 :       MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
    2626         343 :       bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
    2627             :       // In the epilog, a map lookup is needed to get the value from the kernel,
    2628             :       // or previous epilog block. How is does this depends on if the
    2629             :       // instruction is scheduled in the previous block.
    2630         343 :       if (!InKernel) {
    2631             :         int StageDiffAdj = 0;
    2632         190 :         if (LoopValStage != -1 && StageScheduled > LoopValStage)
    2633           0 :           StageDiffAdj = StageScheduled - LoopValStage;
    2634             :         // Use the loop value defined in the kernel, unless the kernel
    2635             :         // contains the last definition of the Phi.
    2636         336 :         if (np == 0 && PrevStage == LastStageNum &&
    2637         190 :             (StageScheduled != 0 || LoopValStage != 0) &&
    2638          41 :             VRMap[PrevStage - StageDiffAdj].count(LoopVal))
    2639          40 :           PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
    2640             :         // Use the value defined by the Phi. We add one because we switch
    2641             :         // from looking at the loop value to the Phi definition.
    2642         150 :         else if (np > 0 && PrevStage == LastStageNum &&
    2643           9 :                  VRMap[PrevStage - np + 1].count(Def))
    2644           9 :           PhiOp2 = VRMap[PrevStage - np + 1][Def];
    2645             :         // Use the loop value defined in the kernel.
    2646         141 :         else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
    2647          12 :                  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
    2648          10 :           PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
    2649             :         // Use the value defined by the Phi, unless we're generating the first
    2650             :         // epilog and the Phi refers to a Phi in a different stage.
    2651         262 :         else if (VRMap[PrevStage - np].count(Def) &&
    2652         131 :                  (!LoopDefIsPhi || PrevStage != LastStageNum))
    2653         126 :           PhiOp2 = VRMap[PrevStage - np][Def];
    2654             :       }
    2655             : 
    2656             :       // Check if we can reuse an existing Phi. This occurs when a Phi
    2657             :       // references another Phi, and the other Phi is scheduled in an
    2658             :       // earlier stage. We can try to reuse an existing Phi up until the last
    2659             :       // stage of the current Phi.
    2660         343 :       if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
    2661          30 :         int LVNumStages = Schedule.getStagesForPhi(LoopVal);
    2662          15 :         int StageDiff = (StageScheduled - LoopValStage);
    2663          15 :         LVNumStages -= StageDiff;
    2664          15 :         if (LVNumStages > (int)np) {
    2665             :           NewReg = PhiOp2;
    2666             :           unsigned ReuseStage = CurStageNum;
    2667           0 :           if (Schedule.isLoopCarried(this, *PhiInst))
    2668           0 :             ReuseStage -= LVNumStages;
    2669             :           // Check if the Phi to reuse has been generated yet. If not, then
    2670             :           // there is nothing to reuse.
    2671           0 :           if (VRMap[ReuseStage].count(LoopVal)) {
    2672           0 :             NewReg = VRMap[ReuseStage][LoopVal];
    2673             : 
    2674           0 :             rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2675             :                                   &*BBI, Def, NewReg);
    2676             :             // Update the map with the new Phi name.
    2677           0 :             VRMap[CurStageNum - np][Def] = NewReg;
    2678             :             PhiOp2 = NewReg;
    2679           0 :             if (VRMap[LastStageNum - np - 1].count(LoopVal))
    2680           0 :               PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
    2681             : 
    2682           0 :             if (IsLast && np == NumPhis - 1)
    2683           0 :               replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2684           0 :             continue;
    2685             :           }
    2686          15 :         } else if (InKernel && StageDiff > 0 &&
    2687           1 :                    VRMap[CurStageNum - StageDiff - np].count(LoopVal))
    2688           0 :           PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
    2689             :       }
    2690             : 
    2691         343 :       const TargetRegisterClass *RC = MRI.getRegClass(Def);
    2692         343 :       NewReg = MRI.createVirtualRegister(RC);
    2693             : 
    2694             :       MachineInstrBuilder NewPhi =
    2695         686 :           BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
    2696         686 :                   TII->get(TargetOpcode::PHI), NewReg);
    2697         343 :       NewPhi.addReg(PhiOp1).addMBB(BB1);
    2698         343 :       NewPhi.addReg(PhiOp2).addMBB(BB2);
    2699         343 :       if (np == 0)
    2700         648 :         InstrMap[NewPhi] = &*BBI;
    2701             : 
    2702             :       // We define the Phis after creating the new pipelined code, so
    2703             :       // we need to rename the Phi values in scheduled instructions.
    2704             : 
    2705             :       unsigned PrevReg = 0;
    2706         343 :       if (InKernel && VRMap[PrevStage - np].count(LoopVal))
    2707         152 :         PrevReg = VRMap[PrevStage - np][LoopVal];
    2708         686 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
    2709             :                             Def, NewReg, PrevReg);
    2710             :       // If the Phi has been scheduled, use the new name for rewriting.
    2711         343 :       if (VRMap[CurStageNum - np].count(Def)) {
    2712          10 :         unsigned R = VRMap[CurStageNum - np][Def];
    2713          10 :         rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
    2714             :                               R, NewReg);
    2715             :       }
    2716             : 
    2717             :       // Check if we need to rename any uses that occurs after the loop. The
    2718             :       // register to replace depends on whether the Phi is scheduled in the
    2719             :       // epilog.
    2720         343 :       if (IsLast && np == NumPhis - 1)
    2721         146 :         replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2722             : 
    2723             :       // In the kernel, a dependent Phi uses the value from this Phi.
    2724         343 :       if (InKernel)
    2725             :         PhiOp2 = NewReg;
    2726             : 
    2727             :       // Update the map with the new Phi name.
    2728         343 :       VRMap[CurStageNum - np][Def] = NewReg;
    2729             :     }
    2730             : 
    2731           0 :     while (NumPhis++ < NumStages) {
    2732           0 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
    2733             :                             &*BBI, Def, NewReg, 0);
    2734             :     }
    2735             : 
    2736             :     // Check if we need to rename a Phi that has been eliminated due to
    2737             :     // scheduling.
    2738         326 :     if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
    2739           0 :       replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
    2740             :   }
    2741         186 : }
    2742             : 
    2743             : /// Generate Phis for the specified block in the generated pipelined code.
    2744             : /// These are new Phis needed because the definition is scheduled after the
    2745             : /// use in the pipelened sequence.
    2746         186 : void SwingSchedulerDAG::generatePhis(
    2747             :     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
    2748             :     MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
    2749             :     InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
    2750             :     bool IsLast) {
    2751             :   // Compute the stage number that contains the initial Phi value, and
    2752             :   // the Phi from the previous stage.
    2753             :   unsigned PrologStage = 0;
    2754             :   unsigned PrevStage = 0;
    2755         186 :   unsigned StageDiff = CurStageNum - LastStageNum;
    2756             :   bool InKernel = (StageDiff == 0);
    2757         186 :   if (InKernel) {
    2758          87 :     PrologStage = LastStageNum - 1;
    2759             :     PrevStage = CurStageNum;
    2760             :   } else {
    2761          99 :     PrologStage = LastStageNum - StageDiff;
    2762          99 :     PrevStage = LastStageNum + StageDiff - 1;
    2763             :   }
    2764             : 
    2765         186 :   for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
    2766         186 :                                    BBE = BB->instr_end();
    2767        1493 :        BBI != BBE; ++BBI) {
    2768        5772 :     for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
    2769        4465 :       MachineOperand &MO = BBI->getOperand(i);
    2770        9288 :       if (!MO.isReg() || !MO.isDef() ||
    2771        1459 :           !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    2772        7310 :         continue;
    2773             : 
    2774         866 :       int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
    2775             :       assert(StageScheduled != -1 && "Expecting scheduled instruction.");
    2776         866 :       unsigned Def = MO.getReg();
    2777         866 :       unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
    2778             :       // An instruction scheduled in stage 0 and is used after the loop
    2779             :       // requires a phi in the epilog for the last definition from either
    2780             :       // the kernel or prolog.
    2781        1062 :       if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
    2782         196 :           hasUseAfterLoop(Def, BB, MRI))
    2783             :         NumPhis = 1;
    2784         866 :       if (!InKernel && (unsigned)StageScheduled > PrologStage)
    2785         112 :         continue;
    2786             : 
    2787        1508 :       unsigned PhiOp2 = VRMap[PrevStage][Def];
    2788         754 :       if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
    2789          27 :         if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
    2790             :           PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
    2791             :       // The number of Phis can't exceed the number of prolog stages. The
    2792             :       // prolog stage number is zero based.
    2793         754 :       if (NumPhis > PrologStage + 1 - StageScheduled)
    2794             :         NumPhis = PrologStage + 1 - StageScheduled;
    2795        1334 :       for (unsigned np = 0; np < NumPhis; ++np) {
    2796         580 :         unsigned PhiOp1 = VRMap[PrologStage][Def];
    2797         290 :         if (np <= PrologStage)
    2798         580 :           PhiOp1 = VRMap[PrologStage - np][Def];
    2799         290 :         if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
    2800         134 :           if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
    2801             :             PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
    2802         134 :           if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
    2803             :             PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
    2804             :         }
    2805         290 :         if (!InKernel)
    2806         318 :           PhiOp2 = VRMap[PrevStage - np][Def];
    2807             : 
    2808         290 :         const TargetRegisterClass *RC = MRI.getRegClass(Def);
    2809         290 :         unsigned NewReg = MRI.createVirtualRegister(RC);
    2810             : 
    2811             :         MachineInstrBuilder NewPhi =
    2812         580 :             BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
    2813         580 :                     TII->get(TargetOpcode::PHI), NewReg);
    2814         290 :         NewPhi.addReg(PhiOp1).addMBB(BB1);
    2815         290 :         NewPhi.addReg(PhiOp2).addMBB(BB2);
    2816         290 :         if (np == 0)
    2817         566 :           InstrMap[NewPhi] = &*BBI;
    2818             : 
    2819             :         // Rewrite uses and update the map. The actions depend upon whether
    2820             :         // we generating code for the kernel or epilog blocks.
    2821         290 :         if (InKernel) {
    2822         131 :           rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2823             :                                 &*BBI, PhiOp1, NewReg);
    2824         131 :           rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2825             :                                 &*BBI, PhiOp2, NewReg);
    2826             : 
    2827             :           PhiOp2 = NewReg;
    2828         262 :           VRMap[PrevStage - np - 1][Def] = NewReg;
    2829             :         } else {
    2830         318 :           VRMap[CurStageNum - np][Def] = NewReg;
    2831         159 :           if (np == NumPhis - 1)
    2832         310 :             rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2833             :                                   &*BBI, Def, NewReg);
    2834             :         }
    2835         290 :         if (IsLast && np == NumPhis - 1)
    2836         116 :           replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2837             :       }
    2838             :     }
    2839             :   }
    2840         186 : }
    2841             : 
    2842             : /// Remove instructions that generate values with no uses.
    2843             : /// Typically, these are induction variable operations that generate values
    2844             : /// used in the loop itself.  A dead instruction has a definition with
    2845             : /// no uses, or uses that occur in the original loop only.
    2846          87 : void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
    2847             :                                                MBBVectorTy &EpilogBBs) {
    2848             :   // For each epilog block, check that the value defined by each instruction
    2849             :   // is used.  If not, delete it.
    2850          87 :   for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
    2851          87 :                                      MBE = EpilogBBs.rend();
    2852         186 :        MBB != MBE; ++MBB)
    2853          99 :     for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
    2854             :                                                    ME = (*MBB)->instr_rend();
    2855         707 :          MI != ME;) {
    2856             :       // From DeadMachineInstructionElem. Don't delete inline assembly.
    2857         609 :       if (MI->isInlineAsm()) {
    2858             :         ++MI;
    2859         303 :         continue;
    2860             :       }
    2861         607 :       bool SawStore = false;
    2862             :       // Check if it's safe to remove the instruction due to side effects.
    2863             :       // We can, and want to, remove Phis here.
    2864         770 :       if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
    2865             :         ++MI;
    2866             :         continue;
    2867             :       }
    2868             :       bool used = true;
    2869        1133 :       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
    2870         444 :                                       MOE = MI->operands_end();
    2871        1133 :            MOI != MOE; ++MOI) {
    2872        2265 :         if (!MOI->isReg() || !MOI->isDef())
    2873             :           continue;
    2874         444 :         unsigned reg = MOI->getReg();
    2875             :         unsigned realUses = 0;
    2876         444 :         for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
    2877             :                                                EI = MRI.use_end();
    2878         444 :              UI != EI; ++UI) {
    2879             :           // Check if there are any uses that occur only in the original
    2880             :           // loop.  If so, that's not a real use.
    2881         305 :           if (UI->getParent()->getParent() != BB) {
    2882             :             realUses++;
    2883             :             used = true;
    2884             :             break;
    2885             :           }
    2886             :         }
    2887         444 :         if (realUses > 0)
    2888             :           break;
    2889             :         used = false;
    2890             :       }
    2891         583 :       if (!used) {
    2892         139 :         MI++->eraseFromParent();
    2893             :         continue;
    2894             :       }
    2895             :       ++MI;
    2896             :     }
    2897             :   // In the kernel block, check if we can remove a Phi that generates a value
    2898             :   // used in an instruction removed in the epilog block.
    2899             :   for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
    2900          87 :                                    BBE = KernelBB->getFirstNonPHI();
    2901         371 :        BBI != BBE;) {
    2902             :     MachineInstr *MI = &*BBI;
    2903             :     ++BBI;
    2904         284 :     unsigned reg = MI->getOperand(0).getReg();
    2905         568 :     if (MRI.use_begin(reg) == MRI.use_end()) {
    2906           0 :       MI->eraseFromParent();
    2907             :     }
    2908             :   }
    2909          87 : }
    2910             : 
    2911             : /// For loop carried definitions, we split the lifetime of a virtual register
    2912             : /// that has uses past the definition in the next iteration. A copy with a new
    2913             : /// virtual register is inserted before the definition, which helps with
    2914             : /// generating a better register assignment.
    2915             : ///
    2916             : ///   v1 = phi(a, v2)     v1 = phi(a, v2)
    2917             : ///   v2 = phi(b, v3)     v2 = phi(b, v3)
    2918             : ///   v3 = ..             v4 = copy v1
    2919             : ///   .. = V1             v3 = ..
    2920             : ///                       .. = v4
    2921          87 : void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
    2922             :                                        MBBVectorTy &EpilogBBs,
    2923             :                                        SMSchedule &Schedule) {
    2924          87 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    2925         458 :   for (auto &PHI : KernelBB->phis()) {
    2926         284 :     unsigned Def = PHI.getOperand(0).getReg();
    2927             :     // Check for any Phi definition that used as an operand of another Phi
    2928             :     // in the same block.
    2929         792 :     for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
    2930             :                                                  E = MRI.use_instr_end();
    2931         792 :          I != E; ++I) {
    2932         115 :       if (I->isPHI() && I->getParent() == KernelBB) {
    2933             :         // Get the loop carried definition.
    2934             :         unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
    2935          16 :         if (!LCDef)
    2936             :           continue;
    2937          16 :         MachineInstr *MI = MRI.getVRegDef(LCDef);
    2938          16 :         if (!MI || MI->getParent() != KernelBB || MI->isPHI())
    2939             :           continue;
    2940             :         // Search through the rest of the block looking for uses of the Phi
    2941             :         // definition. If one occurs, then split the lifetime.
    2942             :         unsigned SplitReg = 0;
    2943             :         for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
    2944         136 :                                     KernelBB->instr_end()))
    2945         120 :           if (BBJ.readsRegister(Def)) {
    2946             :             // We split the lifetime when we find the first use.
    2947          15 :             if (SplitReg == 0) {
    2948          18 :               SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
    2949          18 :               BuildMI(*KernelBB, MI, MI->getDebugLoc(),
    2950           9 :                       TII->get(TargetOpcode::COPY), SplitReg)
    2951           9 :                   .addReg(Def);
    2952             :             }
    2953          15 :             BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
    2954             :           }
    2955          16 :         if (!SplitReg)
    2956             :           continue;
    2957             :         // Search through each of the epilog blocks for any uses to be renamed.
    2958          29 :         for (auto &Epilog : EpilogBBs)
    2959         135 :           for (auto &I : *Epilog)
    2960         105 :             if (I.readsRegister(Def))
    2961          18 :               I.substituteRegister(Def, SplitReg, 0, *TRI);
    2962             :         break;
    2963             :       }
    2964             :     }
    2965             :   }
    2966          87 : }
    2967             : 
    2968             : /// Remove the incoming block from the Phis in a basic block.
    2969          18 : static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
    2970          69 :   for (MachineInstr &MI : *BB) {
    2971             :     if (!MI.isPHI())
    2972             :       break;
    2973          35 :     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
    2974          70 :       if (MI.getOperand(i + 1).getMBB() == Incoming) {
    2975          33 :         MI.RemoveOperand(i + 1);
    2976          33 :         MI.RemoveOperand(i);
    2977          33 :         break;
    2978             :       }
    2979             :   }
    2980          18 : }
    2981             : 
    2982             : /// Create branches from each prolog basic block to the appropriate epilog
    2983             : /// block.  These edges are needed if the loop ends before reaching the
    2984             : /// kernel.
    2985          87 : void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
    2986             :                                     MachineBasicBlock *KernelBB,
    2987             :                                     MBBVectorTy &EpilogBBs,
    2988             :                                     SMSchedule &Schedule, ValueMapTy *VRMap) {
    2989             :   assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
    2990          87 :   MachineInstr *IndVar = Pass.LI.LoopInductionVar;
    2991          87 :   MachineInstr *Cmp = Pass.LI.LoopCompare;
    2992             :   MachineBasicBlock *LastPro = KernelBB;
    2993             :   MachineBasicBlock *LastEpi = KernelBB;
    2994             : 
    2995             :   // Start from the blocks connected to the kernel and work "out"
    2996             :   // to the first prolog and the last epilog blocks.
    2997             :   SmallVector<MachineInstr *, 4> PrevInsts;
    2998          87 :   unsigned MaxIter = PrologBBs.size() - 1;
    2999             :   unsigned LC = UINT_MAX;
    3000             :   unsigned LCMin = UINT_MAX;
    3001         285 :   for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
    3002             :     // Add branches to the prolog that go to the corresponding
    3003             :     // epilog, and the fall-thru prolog/kernel block.
    3004         198 :     MachineBasicBlock *Prolog = PrologBBs[j];
    3005         198 :     MachineBasicBlock *Epilog = EpilogBBs[i];
    3006             :     // We've executed one iteration, so decrement the loop count and check for
    3007             :     // the loop end.
    3008             :     SmallVector<MachineOperand, 4> Cond;
    3009             :     // Check if the LOOP0 has already been removed. If so, then there is no need
    3010             :     // to reduce the trip count.
    3011          99 :     if (LC != 0)
    3012         198 :       LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
    3013          99 :                                 MaxIter);
    3014             : 
    3015             :     // Record the value of the first trip count, which is used to determine if
    3016             :     // branches and blocks can be removed for constant trip counts.
    3017          99 :     if (LCMin == UINT_MAX)
    3018             :       LCMin = LC;
    3019             : 
    3020             :     unsigned numAdded = 0;
    3021          99 :     if (TargetRegisterInfo::isVirtualRegister(LC)) {
    3022          81 :       Prolog->addSuccessor(Epilog);
    3023         243 :       numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
    3024          18 :     } else if (j >= LCMin) {
    3025           1 :       Prolog->addSuccessor(Epilog);
    3026           1 :       Prolog->removeSuccessor(LastPro);
    3027           1 :       LastEpi->removeSuccessor(Epilog);
    3028           3 :       numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
    3029           1 :       removePhis(Epilog, LastEpi);
    3030             :       // Remove the blocks that are no longer referenced.
    3031           1 :       if (LastPro != LastEpi) {
    3032             :         LastEpi->clear();
    3033           0 :         LastEpi->eraseFromParent();
    3034             :       }
    3035             :       LastPro->clear();
    3036           1 :       LastPro->eraseFromParent();
    3037             :     } else {
    3038          51 :       numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
    3039          17 :       removePhis(Epilog, Prolog);
    3040             :     }
    3041             :     LastPro = Prolog;
    3042             :     LastEpi = Epilog;
    3043         180 :     for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
    3044             :                                                    E = Prolog->instr_rend();
    3045         279 :          I != E && numAdded > 0; ++I, --numAdded)
    3046         180 :       updateInstruction(&*I, false, j, 0, Schedule, VRMap);
    3047             :   }
    3048          87 : }
    3049             : 
    3050             : /// Return true if we can compute the amount the instruction changes
    3051             : /// during each iteration. Set Delta to the amount of the change.
    3052         464 : bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
    3053         464 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    3054             :   unsigned BaseReg;
    3055             :   int64_t Offset;
    3056         464 :   if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
    3057             :     return false;
    3058             : 
    3059         422 :   MachineRegisterInfo &MRI = MF.getRegInfo();
    3060             :   // Check if there is a Phi. If so, get the definition in the loop.
    3061         422 :   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
    3062         422 :   if (BaseDef && BaseDef->isPHI()) {
    3063         638 :     BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
    3064         319 :     BaseDef = MRI.getVRegDef(BaseReg);
    3065             :   }
    3066         422 :   if (!BaseDef)
    3067             :     return false;
    3068             : 
    3069         415 :   int D = 0;
    3070         415 :   if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
    3071             :     return false;
    3072             : 
    3073         310 :   Delta = D;
    3074             :   return true;
    3075             : }
    3076             : 
    3077             : /// Update the memory operand with a new offset when the pipeliner
    3078             : /// generates a new copy of the instruction that refers to a
    3079             : /// different memory location.
    3080         935 : void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
    3081             :                                           MachineInstr &OldMI, unsigned Num) {
    3082         935 :   if (Num == 0)
    3083             :     return;
    3084             :   // If the instruction has memory operands, then adjust the offset
    3085             :   // when the instruction appears in different stages.
    3086        1020 :   unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
    3087         510 :   if (NumRefs == 0)
    3088             :     return;
    3089         192 :   MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
    3090             :   unsigned Refs = 0;
    3091         648 :   for (MachineMemOperand *MMO : NewMI.memoperands()) {
    3092         912 :     if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
    3093             :         (!MMO->getValue())) {
    3094           8 :       NewMemRefs[Refs++] = MMO;
    3095           8 :       continue;
    3096             :     }
    3097             :     unsigned Delta;
    3098         220 :     if (computeDelta(OldMI, Delta)) {
    3099          96 :       int64_t AdjOffset = Delta * Num;
    3100         192 :       NewMemRefs[Refs++] =
    3101         192 :           MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
    3102             :     } else
    3103         124 :       NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
    3104             :   }
    3105         192 :   NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
    3106             : }
    3107             : 
    3108             : /// Clone the instruction for the new pipelined loop and update the
    3109             : /// memory operands, if needed.
    3110         597 : MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
    3111             :                                             unsigned CurStageNum,
    3112             :                                             unsigned InstStageNum) {
    3113         597 :   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
    3114             :   // Check for tied operands in inline asm instructions. This should be handled
    3115             :   // elsewhere, but I'm not sure of the best solution.
    3116         597 :   if (OldMI->isInlineAsm())
    3117           8 :     for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
    3118           8 :       const auto &MO = OldMI->getOperand(i);
    3119          10 :       if (MO.isReg() && MO.isUse())
    3120             :         break;
    3121             :       unsigned UseIdx;
    3122           6 :       if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
    3123           0 :         NewMI->tieOperands(i, UseIdx);
    3124             :     }
    3125         597 :   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
    3126         597 :   return NewMI;
    3127             : }
    3128             : 
    3129             : /// Clone the instruction for the new pipelined loop. If needed, this
    3130             : /// function updates the instruction using the values saved in the
    3131             : /// InstrChanges structure.
    3132         338 : MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
    3133             :                                                      unsigned CurStageNum,
    3134             :                                                      unsigned InstStageNum,
    3135             :                                                      SMSchedule &Schedule) {
    3136         338 :   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
    3137             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
    3138         676 :       InstrChanges.find(getSUnit(OldMI));
    3139         338 :   if (It != InstrChanges.end()) {
    3140          10 :     std::pair<unsigned, int64_t> RegAndOffset = It->second;
    3141             :     unsigned BasePos, OffsetPos;
    3142          10 :     if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
    3143           0 :       return nullptr;
    3144          20 :     int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
    3145          10 :     MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
    3146          10 :     if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
    3147          10 :       NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
    3148          10 :     NewMI->getOperand(OffsetPos).setImm(NewOffset);
    3149             :   }
    3150         338 :   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
    3151         338 :   return NewMI;
    3152             : }
    3153             : 
    3154             : /// Update the machine instruction with new virtual registers.  This
    3155             : /// function may change the defintions and/or uses.
    3156        1289 : void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
    3157             :                                           unsigned CurStageNum,
    3158             :                                           unsigned InstrStageNum,
    3159             :                                           SMSchedule &Schedule,
    3160             :                                           ValueMapTy *VRMap) {
    3161        5502 :   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
    3162        4213 :     MachineOperand &MO = NewMI->getOperand(i);
    3163        7343 :     if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3164        1749 :       continue;
    3165        2464 :     unsigned reg = MO.getReg();
    3166        2464 :     if (MO.isDef()) {
    3167             :       // Create a new virtual register for the definition.
    3168         866 :       const TargetRegisterClass *RC = MRI.getRegClass(reg);
    3169         866 :       unsigned NewReg = MRI.createVirtualRegister(RC);
    3170         866 :       MO.setReg(NewReg);
    3171        1732 :       VRMap[CurStageNum][reg] = NewReg;
    3172         866 :       if (LastDef)
    3173          96 :         replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
    3174             :     } else if (MO.isUse()) {
    3175        1598 :       MachineInstr *Def = MRI.getVRegDef(reg);
    3176             :       // Compute the stage that contains the last definition for instruction.
    3177        1598 :       int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
    3178             :       unsigned StageNum = CurStageNum;
    3179        1598 :       if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
    3180             :         // Compute the difference in stages between the defintion and the use.
    3181         175 :         unsigned StageDiff = (InstrStageNum - DefStageNum);
    3182             :         // Make an adjustment to get the last definition.
    3183         175 :         StageNum -= StageDiff;
    3184             :       }
    3185        1598 :       if (VRMap[StageNum].count(reg))
    3186         506 :         MO.setReg(VRMap[StageNum][reg]);
    3187             :     }
    3188             :   }
    3189        1289 : }
    3190             : 
    3191             : /// Return the instruction in the loop that defines the register.
    3192             : /// If the definition is a Phi, then follow the Phi operand to
    3193             : /// the instruction in the loop.
    3194          20 : MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
    3195             :   SmallPtrSet<MachineInstr *, 8> Visited;
    3196          20 :   MachineInstr *Def = MRI.getVRegDef(Reg);
    3197             :   while (Def->isPHI()) {
    3198          10 :     if (!Visited.insert(Def).second)
    3199             :       break;
    3200          20 :     for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
    3201          40 :       if (Def->getOperand(i + 1).getMBB() == BB) {
    3202          20 :         Def = MRI.getVRegDef(Def->getOperand(i).getReg());
    3203             :         break;
    3204             :       }
    3205             :   }
    3206          20 :   return Def;
    3207             : }
    3208             : 
    3209             : /// Return the new name for the value from the previous stage.
    3210         181 : unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
    3211             :                                           unsigned LoopVal, unsigned LoopStage,
    3212             :                                           ValueMapTy *VRMap,
    3213             :                                           MachineBasicBlock *BB) {
    3214             :   unsigned PrevVal = 0;
    3215         181 :   if (StageNum > PhiStage) {
    3216          23 :     MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
    3217          23 :     if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
    3218             :       // The name is defined in the previous stage.
    3219          18 :       PrevVal = VRMap[StageNum - 1][LoopVal];
    3220           5 :     else if (VRMap[StageNum].count(LoopVal))
    3221             :       // The previous name is defined in the current stage when the instruction
    3222             :       // order is swapped.
    3223           0 :       PrevVal = VRMap[StageNum][LoopVal];
    3224           5 :     else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
    3225             :       // The loop value hasn't yet been scheduled.
    3226           1 :       PrevVal = LoopVal;
    3227           4 :     else if (StageNum == PhiStage + 1)
    3228             :       // The loop value is another phi, which has not been scheduled.
    3229             :       PrevVal = getInitPhiReg(*LoopInst, BB);
    3230           0 :     else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
    3231             :       // The loop value is another phi, which has been scheduled.
    3232           0 :       PrevVal =
    3233           0 :           getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
    3234             :                         LoopStage, VRMap, BB);
    3235             :   }
    3236         181 :   return PrevVal;
    3237             : }
    3238             : 
    3239             : /// Rewrite the Phi values in the specified block to use the mappings
    3240             : /// from the initial operand. Once the Phi is scheduled, we switch
    3241             : /// to using the loop value instead of the Phi value, so those names
    3242             : /// do not need to be rewritten.
    3243          99 : void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
    3244             :                                          unsigned StageNum,
    3245             :                                          SMSchedule &Schedule,
    3246             :                                          ValueMapTy *VRMap,
    3247             :                                          InstrMapTy &InstrMap) {
    3248         477 :   for (auto &PHI : BB->phis()) {
    3249             :     unsigned InitVal = 0;
    3250             :     unsigned LoopVal = 0;
    3251         180 :     getPhiRegs(PHI, BB, InitVal, LoopVal);
    3252         180 :     unsigned PhiDef = PHI.getOperand(0).getReg();
    3253             : 
    3254             :     unsigned PhiStage =
    3255         360 :         (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
    3256             :     unsigned LoopStage =
    3257         360 :         (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
    3258             :     unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
    3259         180 :     if (NumPhis > StageNum)
    3260             :       NumPhis = StageNum;
    3261         542 :     for (unsigned np = 0; np <= NumPhis; ++np) {
    3262             :       unsigned NewVal =
    3263         181 :           getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
    3264         181 :       if (!NewVal)
    3265             :         NewVal = InitVal;
    3266         181 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
    3267             :                             PhiDef, NewVal);
    3268             :     }
    3269             :   }
    3270          99 : }
    3271             : 
    3272             : /// Rewrite a previously scheduled instruction to use the register value
    3273             : /// from the new instruction. Make sure the instruction occurs in the
    3274             : /// basic block, and we don't change the uses in the new instruction.
    3275         953 : void SwingSchedulerDAG::rewriteScheduledInstr(
    3276             :     MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
    3277             :     unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
    3278             :     unsigned NewReg, unsigned PrevReg) {
    3279        1906 :   bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
    3280         953 :   int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
    3281             :   // Rewrite uses that have been scheduled already to use the new
    3282             :   // Phi register.
    3283         953 :   for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
    3284             :                                          EI = MRI.use_end();
    3285        3369 :        UI != EI;) {
    3286             :     MachineOperand &UseOp = *UI;
    3287        2416 :     MachineInstr *UseMI = UseOp.getParent();
    3288             :     ++UI;
    3289        2416 :     if (UseMI->getParent() != BB)
    3290        2738 :       continue;
    3291             :     if (UseMI->isPHI()) {
    3292         534 :       if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
    3293         262 :         continue;
    3294          29 :       if (getLoopPhiReg(*UseMI, BB) != OldReg)
    3295          24 :         continue;
    3296             :     }
    3297         904 :     InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
    3298             :     assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
    3299         904 :     SUnit *OrigMISU = getSUnit(OrigInstr->second);
    3300         904 :     int StageSched = Schedule.stageScheduled(OrigMISU);
    3301             :     int CycleSched = Schedule.cycleScheduled(OrigMISU);
    3302             :     unsigned ReplaceReg = 0;
    3303             :     // This is the stage for the scheduled instruction.
    3304         904 :     if (StagePhi == StageSched && Phi->isPHI()) {
    3305             :       int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
    3306         574 :       if (PrevReg && InProlog)
    3307             :         ReplaceReg = PrevReg;
    3308         574 :       else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
    3309           0 :                (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
    3310             :         ReplaceReg = PrevReg;
    3311             :       else
    3312             :         ReplaceReg = NewReg;
    3313             :     }
    3314             :     // The scheduled instruction occurs before the scheduled Phi, and the
    3315             :     // Phi is not loop carried.
    3316        1204 :     if (!InProlog && StagePhi + 1 == StageSched &&
    3317         300 :         !Schedule.isLoopCarried(this, *Phi))
    3318             :       ReplaceReg = NewReg;
    3319         904 :     if (StagePhi > StageSched && Phi->isPHI())
    3320             :       ReplaceReg = NewReg;
    3321        1180 :     if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
    3322             :       ReplaceReg = NewReg;
    3323         904 :     if (ReplaceReg) {
    3324        1716 :       MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
    3325         858 :       UseOp.setReg(ReplaceReg);
    3326             :     }
    3327             :   }
    3328         953 : }
    3329             : 
    3330             : /// Check if we can change the instruction to use an offset value from the
    3331             : /// previous iteration. If so, return true and set the base and offset values
    3332             : /// so that we can rewrite the load, if necessary.
    3333             : ///   v1 = Phi(v0, v3)
    3334             : ///   v2 = load v1, 0
    3335             : ///   v3 = post_store v1, 4, x
    3336             : /// This function enables the load to be rewritten as v2 = load v3, 4.
    3337         880 : bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
    3338             :                                               unsigned &BasePos,
    3339             :                                               unsigned &OffsetPos,
    3340             :                                               unsigned &NewBase,
    3341             :                                               int64_t &Offset) {
    3342             :   // Get the load instruction.
    3343         880 :   if (TII->isPostIncrement(*MI))
    3344             :     return false;
    3345             :   unsigned BasePosLd, OffsetPosLd;
    3346         815 :   if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
    3347             :     return false;
    3348         322 :   unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
    3349             : 
    3350             :   // Look for the Phi instruction.
    3351         161 :   MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
    3352         161 :   MachineInstr *Phi = MRI.getVRegDef(BaseReg);
    3353         161 :   if (!Phi || !Phi->isPHI())
    3354             :     return false;
    3355             :   // Get the register defined in the loop block.
    3356          83 :   unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
    3357          83 :   if (!PrevReg)
    3358             :     return false;
    3359             : 
    3360             :   // Check for the post-increment load/store instruction.
    3361          83 :   MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
    3362          83 :   if (!PrevDef || PrevDef == MI)
    3363             :     return false;
    3364             : 
    3365          83 :   if (!TII->isPostIncrement(*PrevDef))
    3366             :     return false;
    3367             : 
    3368          11 :   unsigned BasePos1 = 0, OffsetPos1 = 0;
    3369          11 :   if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
    3370             :     return false;
    3371             : 
    3372             :   // Make sure offset values are both positive or both negative.
    3373          22 :   int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
    3374          22 :   int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
    3375          11 :   if ((LoadOffset >= 0) != (StoreOffset >= 0))
    3376             :     return false;
    3377             : 
    3378             :   // Set the return value once we determine that we return true.
    3379          11 :   BasePos = BasePosLd;
    3380          11 :   OffsetPos = OffsetPosLd;
    3381          11 :   NewBase = PrevReg;
    3382          11 :   Offset = StoreOffset;
    3383             :   return true;
    3384             : }
    3385             : 
    3386             : /// Apply changes to the instruction if needed. The changes are need
    3387             : /// to improve the scheduling and depend up on the final schedule.
    3388         704 : void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
    3389             :                                          SMSchedule &Schedule) {
    3390             :   SUnit *SU = getSUnit(MI);
    3391             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
    3392         704 :       InstrChanges.find(SU);
    3393         704 :   if (It != InstrChanges.end()) {
    3394          10 :     std::pair<unsigned, int64_t> RegAndOffset = It->second;
    3395             :     unsigned BasePos, OffsetPos;
    3396          10 :     if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
    3397           0 :       return;
    3398          20 :     unsigned BaseReg = MI->getOperand(BasePos).getReg();
    3399          10 :     MachineInstr *LoopDef = findDefInLoop(BaseReg);
    3400          10 :     int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
    3401             :     int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
    3402          10 :     int BaseStageNum = Schedule.stageScheduled(SU);
    3403             :     int BaseCycleNum = Schedule.cycleScheduled(SU);
    3404          10 :     if (BaseStageNum < DefStageNum) {
    3405           8 :       MachineInstr *NewMI = MF.CloneMachineInstr(MI);
    3406           8 :       int OffsetDiff = DefStageNum - BaseStageNum;
    3407           8 :       if (DefCycleNum < BaseCycleNum) {
    3408           0 :         NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
    3409           0 :         if (OffsetDiff > 0)
    3410           0 :           --OffsetDiff;
    3411             :       }
    3412             :       int64_t NewOffset =
    3413          16 :           MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
    3414           8 :       NewMI->getOperand(OffsetPos).setImm(NewOffset);
    3415             :       SU->setInstr(NewMI);
    3416          16 :       MISUnitMap[NewMI] = SU;
    3417           8 :       NewMIs.insert(NewMI);
    3418             :     }
    3419             :   }
    3420             : }
    3421             : 
    3422             : /// Return true for an order dependence that is loop carried potentially.
    3423             : /// An order dependence is loop carried if the destination defines a value
    3424             : /// that may be used by the source in a subsequent iteration.
    3425        1990 : bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
    3426             :                                            bool isSucc) {
    3427        1990 :   if (!isOrder(Source, Dep) || Dep.isArtificial())
    3428             :     return false;
    3429             : 
    3430         194 :   if (!SwpPruneLoopCarried)
    3431             :     return true;
    3432             : 
    3433         194 :   MachineInstr *SI = Source->getInstr();
    3434         194 :   MachineInstr *DI = Dep.getSUnit()->getInstr();
    3435         194 :   if (!isSucc)
    3436             :     std::swap(SI, DI);
    3437             :   assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
    3438             : 
    3439             :   // Assume ordered loads and stores may have a loop carried dependence.
    3440         576 :   if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
    3441         545 :       SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
    3442             :     return true;
    3443             : 
    3444             :   // Only chain dependences between a load and store can be loop carried.
    3445         146 :   if (!DI->mayStore() || !SI->mayLoad())
    3446             :     return false;
    3447             : 
    3448             :   unsigned DeltaS, DeltaD;
    3449         136 :   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
    3450             :     return true;
    3451             : 
    3452             :   unsigned BaseRegS, BaseRegD;
    3453             :   int64_t OffsetS, OffsetD;
    3454         106 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    3455         212 :   if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
    3456         106 :       !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
    3457             :     return true;
    3458             : 
    3459         106 :   if (BaseRegS != BaseRegD)
    3460             :     return true;
    3461             : 
    3462          74 :   uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
    3463          74 :   uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
    3464             : 
    3465             :   // This is the main test, which checks the offset values and the loop
    3466             :   // increment value to determine if the accesses may be loop carried.
    3467          74 :   if (OffsetS >= OffsetD)
    3468          48 :     return OffsetS + AccessSizeS > DeltaS;
    3469             :   else
    3470          26 :     return OffsetD + AccessSizeD > DeltaD;
    3471             : 
    3472             :   return true;
    3473             : }
    3474             : 
    3475             : void SwingSchedulerDAG::postprocessDAG() {
    3476         342 :   for (auto &M : Mutations)
    3477         228 :     M->apply(this);
    3478             : }
    3479             : 
    3480             : /// Try to schedule the node at the specified StartCycle and continue
    3481             : /// until the node is schedule or the EndCycle is reached.  This function
    3482             : /// returns true if the node is scheduled.  This routine may search either
    3483             : /// forward or backward for a place to insert the instruction based upon
    3484             : /// the relative values of StartCycle and EndCycle.
    3485        1467 : bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
    3486             :   bool forward = true;
    3487        1467 :   if (StartCycle > EndCycle)
    3488             :     forward = false;
    3489             : 
    3490             :   // The terminating condition depends on the direction.
    3491        1196 :   int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
    3492        3233 :   for (int curCycle = StartCycle; curCycle != termCycle;
    3493             :        forward ? ++curCycle : --curCycle) {
    3494             : 
    3495             :     // Add the already scheduled instructions at the specified cycle to the DFA.
    3496             :     Resources->clearResources();
    3497        4692 :     for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
    3498        7044 :          checkCycle <= LastCycle; checkCycle += II) {
    3499        2352 :       std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
    3500             : 
    3501             :       for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
    3502             :                                          E = cycleInstrs.end();
    3503        6194 :            I != E; ++I) {
    3504        7684 :         if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
    3505         828 :           continue;
    3506             :         assert(Resources->canReserveResources(*(*I)->getInstr()) &&
    3507             :                "These instructions have already been scheduled.");
    3508        3014 :         Resources->reserveResources(*(*I)->getInstr());
    3509             :       }
    3510             :     }
    3511        6719 :     if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
    3512        2039 :         Resources->canReserveResources(*SU->getInstr())) {
    3513             :       DEBUG({
    3514             :         dbgs() << "\tinsert at cycle " << curCycle << " ";
    3515             :         SU->getInstr()->dump();
    3516             :       });
    3517             : 
    3518        2904 :       ScheduledInstrs[curCycle].push_back(SU);
    3519        2904 :       InstrToCycle.insert(std::make_pair(SU, curCycle));
    3520        1452 :       if (curCycle > LastCycle)
    3521         416 :         LastCycle = curCycle;
    3522        1452 :       if (curCycle < FirstCycle)
    3523           8 :         FirstCycle = curCycle;
    3524        1452 :       return true;
    3525             :     }
    3526             :     DEBUG({
    3527             :       dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
    3528             :       SU->getInstr()->dump();
    3529             :     });
    3530             :   }
    3531          15 :   return false;
    3532             : }
    3533             : 
    3534             : // Return the cycle of the earliest scheduled instruction in the chain.
    3535          11 : int SMSchedule::earliestCycleInChain(const SDep &Dep) {
    3536             :   SmallPtrSet<SUnit *, 8> Visited;
    3537             :   SmallVector<SDep, 8> Worklist;
    3538          11 :   Worklist.push_back(Dep);
    3539          11 :   int EarlyCycle = INT_MAX;
    3540         248 :   while (!Worklist.empty()) {
    3541         237 :     const SDep &Cur = Worklist.pop_back_val();
    3542             :     SUnit *PrevSU = Cur.getSUnit();
    3543         237 :     if (Visited.count(PrevSU))
    3544          97 :       continue;
    3545             :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
    3546         140 :     if (it == InstrToCycle.end())
    3547          15 :       continue;
    3548         250 :     EarlyCycle = std::min(EarlyCycle, it->second);
    3549         831 :     for (const auto &PI : PrevSU->Preds)
    3550         353 :       if (SwingSchedulerDAG::isOrder(PrevSU, PI))
    3551         226 :         Worklist.push_back(PI);
    3552         125 :     Visited.insert(PrevSU);
    3553             :   }
    3554          22 :   return EarlyCycle;
    3555             : }
    3556             : 
    3557             : // Return the cycle of the latest scheduled instruction in the chain.
    3558          67 : int SMSchedule::latestCycleInChain(const SDep &Dep) {
    3559             :   SmallPtrSet<SUnit *, 8> Visited;
    3560             :   SmallVector<SDep, 8> Worklist;
    3561          67 :   Worklist.push_back(Dep);
    3562          67 :   int LateCycle = INT_MIN;
    3563         633 :   while (!Worklist.empty()) {
    3564         566 :     const SDep &Cur = Worklist.pop_back_val();
    3565             :     SUnit *SuccSU = Cur.getSUnit();
    3566         566 :     if (Visited.count(SuccSU))
    3567         136 :       continue;
    3568             :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
    3569         430 :     if (it == InstrToCycle.end())
    3570         158 :       continue;
    3571         544 :     LateCycle = std::max(LateCycle, it->second);
    3572        1900 :     for (const auto &SI : SuccSU->Succs)
    3573         814 :       if (SwingSchedulerDAG::isOrder(SuccSU, SI))
    3574         499 :         Worklist.push_back(SI);
    3575         272 :     Visited.insert(SuccSU);
    3576             :   }
    3577         134 :   return LateCycle;
    3578             : }
    3579             : 
    3580             : /// If an instruction has a use that spans multiple iterations, then
    3581             : /// return true. These instructions are characterized by having a back-ege
    3582             : /// to a Phi, which contains a reference to another Phi.
    3583       16505 : static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
    3584       64631 :   for (auto &P : SU->Preds)
    3585       26442 :     if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
    3586       26697 :       for (auto &S : P.getSUnit()->Succs)
    3587       12159 :         if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
    3588             :           return P.getSUnit();
    3589             :   return nullptr;
    3590             : }
    3591             : 
    3592             : /// Compute the scheduling start slot for the instruction.  The start slot
    3593             : /// depends on any predecessor or successor nodes scheduled already.
    3594        1499 : void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
    3595             :                               int *MinEnd, int *MaxStart, int II,
    3596             :                               SwingSchedulerDAG *DAG) {
    3597             :   // Iterate over each instruction that has been scheduled already.  The start
    3598             :   // slot computuation depends on whether the previously scheduled instruction
    3599             :   // is a predecessor or successor of the specified instruction.
    3600        7743 :   for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
    3601             : 
    3602             :     // Iterate over each instruction in the current cycle.
    3603       16984 :     for (SUnit *I : getInstructions(cycle)) {
    3604             :       // Because we're processing a DAG for the dependences, we recognize
    3605             :       // the back-edge in recurrences by anti dependences.
    3606       27245 :       for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
    3607       16505 :         const SDep &Dep = SU->Preds[i];
    3608       16505 :         if (Dep.getSUnit() == I) {
    3609        1169 :           if (!DAG->isBackedge(SU, Dep)) {
    3610        2280 :             int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
    3611        2280 :                              DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
    3612        1140 :             *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
    3613        1140 :             if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
    3614          11 :               int End = earliestCycleInChain(Dep) + (II - 1);
    3615          11 :               *MinEnd = std::min(*MinEnd, End);
    3616             :             }
    3617             :           } else {
    3618          58 :             int LateStart = cycle - DAG->getLatency(SU, Dep) +
    3619          58 :                             DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
    3620          29 :             *MinLateStart = std::min(*MinLateStart, LateStart);
    3621             :           }
    3622             :         }
    3623             :         // For instruction that requires multiple iterations, make sure that
    3624             :         // the dependent instruction is not scheduled past the definition.
    3625       16505 :         SUnit *BE = multipleIterations(I, DAG);
    3626       16505 :         if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
    3627             :             !SU->isPred(I))
    3628           0 :           *MinLateStart = std::min(*MinLateStart, cycle);
    3629             :       }
    3630       28522 :       for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
    3631       35564 :         if (SU->Succs[i].getSUnit() == I) {
    3632             :           const SDep &Dep = SU->Succs[i];
    3633         810 :           if (!DAG->isBackedge(SU, Dep)) {
    3634        1166 :             int LateStart = cycle - DAG->getLatency(SU, Dep) +
    3635        1166 :                             DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
    3636         583 :             *MinLateStart = std::min(*MinLateStart, LateStart);
    3637         583 :             if (DAG->isLoopCarriedOrder(SU, Dep)) {
    3638          67 :               int Start = latestCycleInChain(Dep) + 1 - II;
    3639          67 :               *MaxStart = std::max(*MaxStart, Start);
    3640             :             }
    3641             :           } else {
    3642         454 :             int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
    3643         454 :                              DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
    3644         227 :             *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
    3645             :           }
    3646             :         }
    3647             :     }
    3648             :   }
    3649        1499 : }
    3650             : 
    3651             : /// Order the instructions within a cycle so that the definitions occur
    3652             : /// before the uses. Returns true if the instruction is added to the start
    3653             : /// of the list, or false if added to the end.
    3654         798 : bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
    3655             :                                  std::deque<SUnit *> &Insts) {
    3656         798 :   MachineInstr *MI = SU->getInstr();
    3657             :   bool OrderBeforeUse = false;
    3658             :   bool OrderAfterDef = false;
    3659             :   bool OrderBeforeDef = false;
    3660             :   unsigned MoveDef = 0;
    3661             :   unsigned MoveUse = 0;
    3662         798 :   int StageInst1 = stageScheduled(SU);
    3663             : 
    3664             :   unsigned Pos = 0;
    3665        2116 :   for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
    3666             :        ++I, ++Pos) {
    3667             :     // Relative order of Phis does not matter.
    3668         161 :     if (MI->isPHI() && (*I)->getInstr()->isPHI())
    3669          77 :       continue;
    3670        4896 :     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
    3671        1866 :       MachineOperand &MO = MI->getOperand(i);
    3672        3760 :       if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3673         517 :         continue;
    3674             :       unsigned Reg = MO.getReg();
    3675             :       unsigned BasePos, OffsetPos;
    3676        1349 :       if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
    3677         732 :         if (MI->getOperand(BasePos).getReg() == Reg)
    3678         218 :           if (unsigned NewReg = SSD->getInstrBaseReg(SU))
    3679             :             Reg = NewReg;
    3680             :       bool Reads, Writes;
    3681             :       std::tie(Reads, Writes) =
    3682        2698 :           (*I)->getInstr()->readsWritesVirtualRegister(Reg);
    3683        1349 :       if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
    3684             :         OrderBeforeUse = true;
    3685             :         MoveUse = Pos;
    3686        1316 :       } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
    3687             :         // Add the instruction after the scheduled instruction.
    3688             :         OrderAfterDef = true;
    3689             :         MoveDef = Pos;
    3690        1221 :       } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
    3691          36 :         if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
    3692             :           OrderBeforeUse = true;
    3693             :           MoveUse = Pos;
    3694             :         } else {
    3695             :           OrderAfterDef = true;
    3696             :           MoveDef = Pos;
    3697             :         }
    3698        2034 :       } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
    3699             :         OrderBeforeUse = true;
    3700             :         MoveUse = Pos;
    3701           8 :         if (MoveUse != 0) {
    3702             :           OrderAfterDef = true;
    3703           7 :           MoveDef = Pos - 1;
    3704             :         }
    3705        2018 :       } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
    3706             :         // Add the instruction before the scheduled instruction.
    3707             :         OrderBeforeUse = true;
    3708             :         MoveUse = Pos;
    3709        1527 :       } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
    3710         372 :                  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
    3711             :         OrderBeforeDef = true;
    3712             :         MoveUse = Pos;
    3713             :       }
    3714             :     }
    3715             :     // Check for order dependences between instructions. Make sure the source
    3716             :     // is ordered before the destination.
    3717        2130 :     for (auto &S : SU->Succs)
    3718         483 :       if (S.getKind() == SDep::Order) {
    3719          72 :         if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
    3720             :           OrderBeforeUse = true;
    3721             :           MoveUse = Pos;
    3722             :         }
    3723         822 :       } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
    3724           0 :         if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
    3725             :           if (S.isAssignedRegDep()) {
    3726             :             OrderAfterDef = true;
    3727             :             MoveDef = Pos;
    3728             :           }
    3729             :         } else {
    3730             :           OrderBeforeUse = true;
    3731             :           MoveUse = Pos;
    3732             :         }
    3733             :       }
    3734        2292 :     for (auto &P : SU->Preds)
    3735         855 :       if (P.getKind() == SDep::Order) {
    3736          43 :         if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
    3737             :           OrderAfterDef = true;
    3738             :           MoveDef = Pos;
    3739             :         }
    3740        1624 :       } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
    3741           0 :         if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
    3742             :           if (P.isAssignedRegDep()) {
    3743             :             OrderBeforeUse = true;
    3744             :             MoveUse = Pos;
    3745             :           }
    3746             :         } else {
    3747             :           OrderAfterDef = true;
    3748             :           MoveDef = Pos;
    3749             :         }
    3750             :       }
    3751             :   }
    3752             : 
    3753             :   // A circular dependence.
    3754         798 :   if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
    3755             :     OrderBeforeUse = false;
    3756             : 
    3757             :   // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
    3758             :   // to a loop-carried dependence.
    3759         798 :   if (OrderBeforeDef)
    3760          60 :     OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
    3761             : 
    3762             :   // The uncommon case when the instruction order needs to be updated because
    3763             :   // there is both a use and def.
    3764         798 :   if (OrderBeforeUse && OrderAfterDef) {
    3765          47 :     SUnit *UseSU = Insts.at(MoveUse);
    3766          47 :     SUnit *DefSU = Insts.at(MoveDef);
    3767          47 :     if (MoveUse > MoveDef) {
    3768         135 :       Insts.erase(Insts.begin() + MoveUse);
    3769         135 :       Insts.erase(Insts.begin() + MoveDef);
    3770             :     } else {
    3771           6 :       Insts.erase(Insts.begin() + MoveDef);
    3772           6 :       Insts.erase(Insts.begin() + MoveUse);
    3773             :     }
    3774          47 :     if (orderDependence(SSD, UseSU, Insts)) {
    3775           0 :       Insts.push_front(SU);
    3776           0 :       orderDependence(SSD, DefSU, Insts);
    3777           0 :       return true;
    3778             :     }
    3779          47 :     Insts.pop_back();
    3780          47 :     Insts.push_back(SU);
    3781          47 :     Insts.push_back(UseSU);
    3782          47 :     orderDependence(SSD, DefSU, Insts);
    3783          47 :     return false;
    3784             :   }
    3785             :   // Put the new instruction first if there is a use in the list. Otherwise,
    3786             :   // put it at the end of the list.
    3787         751 :   if (OrderBeforeUse)
    3788          97 :     Insts.push_front(SU);
    3789             :   else
    3790         654 :     Insts.push_back(SU);
    3791             :   return OrderBeforeUse;
    3792             : }
    3793             : 
    3794             : /// Return true if the scheduled Phi has a loop carried operand.
    3795        1117 : bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
    3796             :   if (!Phi.isPHI())
    3797             :     return false;
    3798             :   assert(Phi.isPHI() && "Expecing a Phi.");
    3799             :   SUnit *DefSU = SSD->getSUnit(&Phi);
    3800             :   unsigned DefCycle = cycleScheduled(DefSU);
    3801         853 :   int DefStage = stageScheduled(DefSU);
    3802             : 
    3803             :   unsigned InitVal = 0;
    3804             :   unsigned LoopVal = 0;
    3805         853 :   getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
    3806         853 :   SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
    3807         850 :   if (!UseSU)
    3808             :     return true;
    3809         850 :   if (UseSU->getInstr()->isPHI())
    3810             :     return true;
    3811             :   unsigned LoopCycle = cycleScheduled(UseSU);
    3812         836 :   int LoopStage = stageScheduled(UseSU);
    3813         836 :   return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
    3814             : }
    3815             : 
    3816             : /// Return true if the instruction is a definition that is loop carried
    3817             : /// and defines the use on the next iteration.
    3818             : ///        v1 = phi(v2, v3)
    3819             : ///  (Def) v3 = op v1
    3820             : ///  (MO)   = v1
    3821             : /// If MO appears before Def, then then v1 and v3 may get assigned to the same
    3822             : /// register.
    3823         372 : bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
    3824             :                                        MachineInstr *Def, MachineOperand &MO) {
    3825         372 :   if (!MO.isReg())
    3826             :     return false;
    3827             :   if (Def->isPHI())
    3828             :     return false;
    3829         372 :   MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
    3830         568 :   if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
    3831             :     return false;
    3832         196 :   if (!isLoopCarried(SSD, *Phi))
    3833             :     return false;
    3834         194 :   unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
    3835         655 :   for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
    3836         522 :     MachineOperand &DMO = Def->getOperand(i);
    3837        1255 :     if (!DMO.isReg() || !DMO.isDef())
    3838         320 :       continue;
    3839         202 :     if (DMO.getReg() == LoopReg)
    3840             :       return true;
    3841             :   }
    3842             :   return false;
    3843             : }
    3844             : 
    3845             : // Check if the generated schedule is valid. This function checks if
    3846             : // an instruction that uses a physical register is scheduled in a
    3847             : // different stage than the definition. The pipeliner does not handle
    3848             : // physical register values that may cross a basic block boundary.
    3849         110 : bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
    3850         924 :   for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
    3851         704 :     SUnit &SU = SSD->SUnits[i];
    3852         704 :     if (!SU.hasPhysRegDefs)
    3853         704 :       continue;
    3854           0 :     int StageDef = stageScheduled(&SU);
    3855             :     assert(StageDef != -1 && "Instruction should have been scheduled.");
    3856           0 :     for (auto &SI : SU.Succs)
    3857             :       if (SI.isAssignedRegDep())
    3858           0 :         if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
    3859           0 :           if (stageScheduled(SI.getSUnit()) != StageDef)
    3860             :             return false;
    3861             :   }
    3862             :   return true;
    3863             : }
    3864             : 
    3865             : /// Attempt to fix the degenerate cases when the instruction serialization
    3866             : /// causes the register lifetimes to overlap. For example,
    3867             : ///   p' = store_pi(p, b)
    3868             : ///      = load p, offset
    3869             : /// In this case p and p' overlap, which means that two registers are needed.
    3870             : /// Instead, this function changes the load to use p' and updates the offset.
    3871         201 : void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
    3872             :   unsigned OverlapReg = 0;
    3873             :   unsigned NewBaseReg = 0;
    3874         905 :   for (SUnit *SU : Instrs) {
    3875         704 :     MachineInstr *MI = SU->getInstr();
    3876        3021 :     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
    3877        2409 :       const MachineOperand &MO = MI->getOperand(i);
    3878             :       // Look for an instruction that uses p. The instruction occurs in the
    3879             :       // same cycle but occurs later in the serialized order.
    3880        4093 :       if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
    3881             :         // Check that the instruction appears in the InstrChanges structure,
    3882             :         // which contains instructions that can have the offset updated.
    3883             :         DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
    3884           1 :           InstrChanges.find(SU);
    3885           1 :         if (It != InstrChanges.end()) {
    3886             :           unsigned BasePos, OffsetPos;
    3887             :           // Update the base register and adjust the offset.
    3888           1 :           if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
    3889           1 :             MachineInstr *NewMI = MF.CloneMachineInstr(MI);
    3890           2 :             NewMI->getOperand(BasePos).setReg(NewBaseReg);
    3891             :             int64_t NewOffset =
    3892           2 :                 MI->getOperand(OffsetPos).getImm() - It->second.second;
    3893           1 :             NewMI->getOperand(OffsetPos).setImm(NewOffset);
    3894             :             SU->setInstr(NewMI);
    3895           2 :             MISUnitMap[NewMI] = SU;
    3896           1 :             NewMIs.insert(NewMI);
    3897             :           }
    3898             :         }
    3899             :         OverlapReg = 0;
    3900             :         NewBaseReg = 0;
    3901             :         break;
    3902             :       }
    3903             :       // Look for an instruction of the form p' = op(p), which uses and defines
    3904             :       // two virtual registers that get allocated to the same physical register.
    3905        2408 :       unsigned TiedUseIdx = 0;
    3906        2408 :       if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
    3907             :         // OverlapReg is p in the example above.
    3908         182 :         OverlapReg = MI->getOperand(TiedUseIdx).getReg();
    3909             :         // NewBaseReg is p' in the example above.
    3910          91 :         NewBaseReg = MI->getOperand(i).getReg();
    3911          91 :         break;
    3912             :       }
    3913             :     }
    3914             :   }
    3915         201 : }
    3916             : 
    3917             : /// After the schedule has been formed, call this function to combine
    3918             : /// the instructions from the different stages/cycles.  That is, this
    3919             : /// function creates a schedule that represents a single iteration.
    3920         110 : void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
    3921             :   // Move all instructions to the first stage from later stages.
    3922         622 :   for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
    3923         760 :     for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
    3924             :          ++stage) {
    3925             :       std::deque<SUnit *> &cycleInstrs =
    3926         358 :           ScheduledInstrs[cycle + (stage * InitiationInterval)];
    3927             :       for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
    3928             :                                                  E = cycleInstrs.rend();
    3929         370 :            I != E; ++I)
    3930         191 :         ScheduledInstrs[cycle].push_front(*I);
    3931             :     }
    3932             :   }
    3933             :   // Iterate over the definitions in each instruction, and compute the
    3934             :   // stage difference for each use.  Keep the maximum value.
    3935         814 :   for (auto &I : InstrToCycle) {
    3936         704 :     int DefStage = stageScheduled(I.first);
    3937         704 :     MachineInstr *MI = I.first->getInstr();
    3938        3377 :     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
    3939        2673 :       MachineOperand &Op = MI->getOperand(i);
    3940        4553 :       if (!Op.isReg() || !Op.isDef())
    3941        2004 :         continue;
    3942             : 
    3943         669 :       unsigned Reg = Op.getReg();
    3944         669 :       unsigned MaxDiff = 0;
    3945             :       bool PhiIsSwapped = false;
    3946         669 :       for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
    3947             :                                              EI = MRI.use_end();
    3948        1570 :            UI != EI; ++UI) {
    3949             :         MachineOperand &UseOp = *UI;
    3950         901 :         MachineInstr *UseMI = UseOp.getParent();
    3951             :         SUnit *SUnitUse = SSD->getSUnit(UseMI);
    3952         901 :         int UseStage = stageScheduled(SUnitUse);
    3953         901 :         unsigned Diff = 0;
    3954         901 :         if (UseStage != -1 && UseStage >= DefStage)
    3955         856 :           Diff = UseStage - DefStage;
    3956             :         if (MI->isPHI()) {
    3957         358 :           if (isLoopCarried(SSD, *MI))
    3958         355 :             ++Diff;
    3959             :           else
    3960             :             PhiIsSwapped = true;
    3961             :         }
    3962         901 :         MaxDiff = std::max(Diff, MaxDiff);
    3963             :       }
    3964         669 :       RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
    3965             :     }
    3966             :   }
    3967             : 
    3968             :   // Erase all the elements in the later stages. Only one iteration should
    3969             :   // remain in the scheduled list, and it contains all the instructions.
    3970         370 :   for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
    3971         150 :     ScheduledInstrs.erase(cycle);
    3972             : 
    3973             :   // Change the registers in instruction as specified in the InstrChanges
    3974             :   // map. We need to use the new registers to create the correct order.
    3975         924 :   for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
    3976         704 :     SUnit *SU = &SSD->SUnits[i];
    3977         704 :     SSD->applyInstrChange(SU->getInstr(), *this);
    3978             :   }
    3979             : 
    3980             :   // Reorder the instructions in each cycle to fix and improve the
    3981             :   // generated code.
    3982         622 :   for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
    3983         201 :     std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
    3984             :     std::deque<SUnit *> newOrderZC;
    3985             :     // Put the zero-cost, pseudo instructions at the start of the cycle.
    3986         905 :     for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
    3987         704 :       SUnit *SU = cycleInstrs[i];
    3988        1408 :       if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
    3989         202 :         orderDependence(SSD, SU, newOrderZC);
    3990             :     }
    3991             :     std::deque<SUnit *> newOrderI;
    3992             :     // Then, add the regular instructions back.
    3993         905 :     for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
    3994         704 :       SUnit *SU = cycleInstrs[i];
    3995        1408 :       if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
    3996         502 :         orderDependence(SSD, SU, newOrderI);
    3997             :     }
    3998             :     // Replace the old order with the new order.
    3999             :     cycleInstrs.swap(newOrderZC);
    4000         201 :     cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
    4001         201 :     SSD->fixupRegisterOverlaps(cycleInstrs);
    4002             :   }
    4003             : 
    4004             :   DEBUG(dump(););
    4005      291689 : }
    4006             : 
    4007             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    4008             : /// Print the schedule information to the given output.
    4009             : void SMSchedule::print(raw_ostream &os) const {
    4010             :   // Iterate over each cycle.
    4011             :   for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
    4012             :     // Iterate over each instruction in the cycle.
    4013             :     const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
    4014             :     for (SUnit *CI : cycleInstrs->second) {
    4015             :       os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
    4016             :       os << "(" << CI->NodeNum << ") ";
    4017             :       CI->getInstr()->print(os);
    4018             :       os << "\n";
    4019             :     }
    4020             :   }
    4021             : }
    4022             : 
    4023             : /// Utility function used for debugging to print the schedule.
    4024             : LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
    4025             : #endif

Generated by: LCOV version 1.13