LCOV - code coverage report
Current view: top level - lib/CodeGen - MachinePipeliner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1810 1905 95.0 %
Date: 2017-09-14 15:23:50 Functions: 94 95 98.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
      11             : //
      12             : // Software pipelining (SWP) is an instruction scheduling technique for loops
      13             : // that overlap loop iterations and explioits ILP via a compiler transformation.
      14             : //
      15             : // Swing Modulo Scheduling is an implementation of software pipelining
      16             : // that generates schedules that are near optimal in terms of initiation
      17             : // interval, register requirements, and stage count. See the papers:
      18             : //
      19             : // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
      20             : // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
      21             : // Conference on Parallel Architectures and Compilation Techiniques.
      22             : //
      23             : // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
      24             : // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
      25             : // Transactions on Computers, Vol. 50, No. 3, 2001.
      26             : //
      27             : // "An Implementation of Swing Modulo Scheduling With Extensions for
      28             : // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
      29             : // Urbana-Chambpain, 2005.
      30             : //
      31             : //
      32             : // The SMS algorithm consists of three main steps after computing the minimal
      33             : // initiation interval (MII).
      34             : // 1) Analyze the dependence graph and compute information about each
      35             : //    instruction in the graph.
      36             : // 2) Order the nodes (instructions) by priority based upon the heuristics
      37             : //    described in the algorithm.
      38             : // 3) Attempt to schedule the nodes in the specified order using the MII.
      39             : //
      40             : // This SMS implementation is a target-independent back-end pass. When enabled,
      41             : // the pass runs just prior to the register allocation pass, while the machine
      42             : // IR is in SSA form. If software pipelining is successful, then the original
      43             : // loop is replaced by the optimized loop. The optimized loop contains one or
      44             : // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
      45             : // the instructions cannot be scheduled in a given MII, we increase the MII by
      46             : // one and try again.
      47             : //
      48             : // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
      49             : // represent loop carried dependences in the DAG as order edges to the Phi
      50             : // nodes. We also perform several passes over the DAG to eliminate unnecessary
      51             : // edges that inhibit the ability to pipeline. The implementation uses the
      52             : // DFAPacketizer class to compute the minimum initiation interval and the check
      53             : // where an instruction may be inserted in the pipelined schedule.
      54             : //
      55             : // In order for the SMS pass to work, several target specific hooks need to be
      56             : // implemented to get information about the loop structure and to rewrite
      57             : // instructions.
      58             : //
      59             : //===----------------------------------------------------------------------===//
      60             : 
      61             : #include "llvm/ADT/ArrayRef.h"
      62             : #include "llvm/ADT/BitVector.h"
      63             : #include "llvm/ADT/DenseMap.h"
      64             : #include "llvm/ADT/MapVector.h"
      65             : #include "llvm/ADT/PriorityQueue.h"
      66             : #include "llvm/ADT/SetVector.h"
      67             : #include "llvm/ADT/SmallPtrSet.h"
      68             : #include "llvm/ADT/SmallSet.h"
      69             : #include "llvm/ADT/SmallVector.h"
      70             : #include "llvm/ADT/Statistic.h"
      71             : #include "llvm/ADT/iterator_range.h"
      72             : #include "llvm/Analysis/AliasAnalysis.h"
      73             : #include "llvm/Analysis/MemoryLocation.h"
      74             : #include "llvm/Analysis/ValueTracking.h"
      75             : #include "llvm/CodeGen/DFAPacketizer.h"
      76             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      77             : #include "llvm/CodeGen/MachineBasicBlock.h"
      78             : #include "llvm/CodeGen/MachineDominators.h"
      79             : #include "llvm/CodeGen/MachineFunction.h"
      80             : #include "llvm/CodeGen/MachineFunctionPass.h"
      81             : #include "llvm/CodeGen/MachineInstr.h"
      82             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      83             : #include "llvm/CodeGen/MachineLoopInfo.h"
      84             : #include "llvm/CodeGen/MachineMemOperand.h"
      85             : #include "llvm/CodeGen/MachineOperand.h"
      86             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      87             : #include "llvm/CodeGen/RegisterClassInfo.h"
      88             : #include "llvm/CodeGen/RegisterPressure.h"
      89             : #include "llvm/CodeGen/ScheduleDAG.h"
      90             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      91             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      92             : #include "llvm/IR/Attributes.h"
      93             : #include "llvm/IR/DebugLoc.h"
      94             : #include "llvm/IR/Function.h"
      95             : #include "llvm/MC/LaneBitmask.h"
      96             : #include "llvm/MC/MCInstrDesc.h"
      97             : #include "llvm/MC/MCInstrItineraries.h"
      98             : #include "llvm/MC/MCRegisterInfo.h"
      99             : #include "llvm/Pass.h"
     100             : #include "llvm/Support/CommandLine.h"
     101             : #include "llvm/Support/Compiler.h"
     102             : #include "llvm/Support/Debug.h"
     103             : #include "llvm/Support/MathExtras.h"
     104             : #include "llvm/Support/raw_ostream.h"
     105             : #include "llvm/Target/TargetInstrInfo.h"
     106             : #include "llvm/Target/TargetOpcodes.h"
     107             : #include "llvm/Target/TargetRegisterInfo.h"
     108             : #include "llvm/Target/TargetSubtargetInfo.h"
     109             : #include <algorithm>
     110             : #include <cassert>
     111             : #include <climits>
     112             : #include <cstdint>
     113             : #include <deque>
     114             : #include <functional>
     115             : #include <iterator>
     116             : #include <map>
     117             : #include <memory>
     118             : #include <tuple>
     119             : #include <utility>
     120             : #include <vector>
     121             : 
     122             : using namespace llvm;
     123             : 
     124             : #define DEBUG_TYPE "pipeliner"
     125             : 
     126             : STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
     127             : STATISTIC(NumPipelined, "Number of loops software pipelined");
     128             : 
     129             : /// A command line option to turn software pipelining on or off.
     130      289224 : static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
     131             :                                cl::ZeroOrMore,
     132      289224 :                                cl::desc("Enable Software Pipelining"));
     133             : 
     134             : /// A command line option to enable SWP at -Os.
     135       72306 : static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
     136      216918 :                                       cl::desc("Enable SWP at Os."), cl::Hidden,
     137      289224 :                                       cl::init(false));
     138             : 
     139             : /// A command line argument to limit minimum initial interval for pipelining.
     140       72306 : static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
     141      216918 :                               cl::desc("Size limit for the the MII."),
     142      289224 :                               cl::Hidden, cl::init(27));
     143             : 
     144             : /// A command line argument to limit the number of stages in the pipeline.
     145             : static cl::opt<int>
     146       72306 :     SwpMaxStages("pipeliner-max-stages",
     147      216918 :                  cl::desc("Maximum stages allowed in the generated scheduled."),
     148      289224 :                  cl::Hidden, cl::init(3));
     149             : 
     150             : /// A command line option to disable the pruning of chain dependences due to
     151             : /// an unrelated Phi.
     152             : static cl::opt<bool>
     153       72306 :     SwpPruneDeps("pipeliner-prune-deps",
     154      216918 :                  cl::desc("Prune dependences between unrelated Phi nodes."),
     155      289224 :                  cl::Hidden, cl::init(true));
     156             : 
     157             : /// A command line option to disable the pruning of loop carried order
     158             : /// dependences.
     159             : static cl::opt<bool>
     160       72306 :     SwpPruneLoopCarried("pipeliner-prune-loop-carried",
     161      216918 :                         cl::desc("Prune loop carried order dependences."),
     162      289224 :                         cl::Hidden, cl::init(true));
     163             : 
     164             : #ifndef NDEBUG
     165             : static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
     166             : #endif
     167             : 
     168       72306 : static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
     169      216918 :                                      cl::ReallyHidden, cl::init(false),
     170      289224 :                                      cl::ZeroOrMore, cl::desc("Ignore RecMII"));
     171             : 
     172             : namespace {
     173             : 
     174             : class NodeSet;
     175             : class SMSchedule;
     176             : 
     177             : /// The main class in the implementation of the target independent
     178             : /// software pipeliner pass.
     179        1215 : class MachinePipeliner : public MachineFunctionPass {
     180             : public:
     181             :   MachineFunction *MF = nullptr;
     182             :   const MachineLoopInfo *MLI = nullptr;
     183             :   const MachineDominatorTree *MDT = nullptr;
     184             :   const InstrItineraryData *InstrItins;
     185             :   const TargetInstrInfo *TII = nullptr;
     186             :   RegisterClassInfo RegClassInfo;
     187             : 
     188             : #ifndef NDEBUG
     189             :   static int NumTries;
     190             : #endif
     191             : 
     192             :   /// Cache the target analysis information about the loop.
     193        1620 :   struct LoopInfo {
     194             :     MachineBasicBlock *TBB = nullptr;
     195             :     MachineBasicBlock *FBB = nullptr;
     196             :     SmallVector<MachineOperand, 4> BrCond;
     197             :     MachineInstr *LoopInductionVar = nullptr;
     198             :     MachineInstr *LoopCompare = nullptr;
     199             :   };
     200             :   LoopInfo LI;
     201             : 
     202             :   static char ID;
     203             : 
     204         810 :   MachinePipeliner() : MachineFunctionPass(ID) {
     205         405 :     initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
     206         405 :   }
     207             : 
     208             :   bool runOnMachineFunction(MachineFunction &MF) override;
     209             : 
     210         401 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     211         401 :     AU.addRequired<AAResultsWrapperPass>();
     212         401 :     AU.addPreserved<AAResultsWrapperPass>();
     213         401 :     AU.addRequired<MachineLoopInfo>();
     214         401 :     AU.addRequired<MachineDominatorTree>();
     215         401 :     AU.addRequired<LiveIntervals>();
     216         401 :     MachineFunctionPass::getAnalysisUsage(AU);
     217         401 :   }
     218             : 
     219             : private:
     220             :   bool canPipelineLoop(MachineLoop &L);
     221             :   bool scheduleLoop(MachineLoop &L);
     222             :   bool swingModuloScheduler(MachineLoop &L);
     223             : };
     224             : 
     225             : /// This class builds the dependence graph for the instructions in a loop,
     226             : /// and attempts to schedule the instructions using the SMS algorithm.
     227         452 : class SwingSchedulerDAG : public ScheduleDAGInstrs {
     228             :   MachinePipeliner &Pass;
     229             :   /// The minimum initiation interval between iterations for this schedule.
     230             :   unsigned MII = 0;
     231             :   /// Set to true if a valid pipelined schedule is found for the loop.
     232             :   bool Scheduled = false;
     233             :   MachineLoop &Loop;
     234             :   LiveIntervals &LIS;
     235             :   const RegisterClassInfo &RegClassInfo;
     236             : 
     237             :   /// A toplogical ordering of the SUnits, which is needed for changing
     238             :   /// dependences and iterating over the SUnits.
     239             :   ScheduleDAGTopologicalSort Topo;
     240             : 
     241             :   struct NodeInfo {
     242             :     int ASAP = 0;
     243             :     int ALAP = 0;
     244             : 
     245             :     NodeInfo() = default;
     246             :   };
     247             :   /// Computed properties for each node in the graph.
     248             :   std::vector<NodeInfo> ScheduleInfo;
     249             : 
     250             :   enum OrderKind { BottomUp = 0, TopDown = 1 };
     251             :   /// Computed node ordering for scheduling.
     252             :   SetVector<SUnit *> NodeOrder;
     253             : 
     254             :   using NodeSetType = SmallVector<NodeSet, 8>;
     255             :   using ValueMapTy = DenseMap<unsigned, unsigned>;
     256             :   using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
     257             :   using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
     258             : 
     259             :   /// Instructions to change when emitting the final schedule.
     260             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
     261             : 
     262             :   /// We may create a new instruction, so remember it because it
     263             :   /// must be deleted when the pass is finished.
     264             :   SmallPtrSet<MachineInstr *, 4> NewMIs;
     265             : 
     266             :   /// Ordered list of DAG postprocessing steps.
     267             :   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
     268             : 
     269             :   /// Helper class to implement Johnson's circuit finding algorithm.
     270         452 :   class Circuits {
     271             :     std::vector<SUnit> &SUnits;
     272             :     SetVector<SUnit *> Stack;
     273             :     BitVector Blocked;
     274             :     SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
     275             :     SmallVector<SmallVector<int, 4>, 16> AdjK;
     276             :     unsigned NumPaths;
     277             :     static unsigned MaxPaths;
     278             : 
     279             :   public:
     280         113 :     Circuits(std::vector<SUnit> &SUs)
     281        1243 :         : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
     282             : 
     283             :     /// Reset the data structures used in the circuit algorithm.
     284        1361 :     void reset() {
     285        2722 :       Stack.clear();
     286        2722 :       Blocked.reset();
     287        5444 :       B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
     288        1361 :       NumPaths = 0;
     289        1361 :     }
     290             : 
     291             :     void createAdjacencyStructure(SwingSchedulerDAG *DAG);
     292             :     bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
     293             :     void unblock(int U);
     294             :   };
     295             : 
     296             : public:
     297         113 :   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
     298             :                     const RegisterClassInfo &rci)
     299         226 :       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
     300         791 :         RegClassInfo(rci), Topo(SUnits, &ExitSU) {
     301         113 :     P.MF->getSubtarget().getSMSMutations(Mutations);
     302         113 :   }
     303             : 
     304             :   void schedule() override;
     305             :   void finishBlock() override;
     306             : 
     307             :   /// Return true if the loop kernel has been scheduled.
     308             :   bool hasNewSchedule() { return Scheduled; }
     309             : 
     310             :   /// Return the earliest time an instruction may be scheduled.
     311        5320 :   int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
     312             : 
     313             :   /// Return the latest time an instruction my be scheduled.
     314        5696 :   int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
     315             : 
     316             :   /// The mobility function, which the the number of slots in which
     317             :   /// an instruction may be scheduled.
     318        5451 :   int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
     319             : 
     320             :   /// The depth, in the dependence graph, for a node.
     321        1413 :   int getDepth(SUnit *Node) { return Node->getDepth(); }
     322             : 
     323             :   /// The height, in the dependence graph, for a node.
     324        4314 :   int getHeight(SUnit *Node) { return Node->getHeight(); }
     325             : 
     326             :   /// Return true if the dependence is a back-edge in the data dependence graph.
     327             :   /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
     328             :   /// using an anti dependence from a Phi to an instruction.
     329       33418 :   bool isBackedge(SUnit *Source, const SDep &Dep) {
     330       33418 :     if (Dep.getKind() != SDep::Anti)
     331             :       return false;
     332        5996 :     return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
     333             :   }
     334             : 
     335             :   /// Return true if the dependence is an order dependence between non-Phis.
     336       55724 :   static bool isOrder(SUnit *Source, const SDep &Dep) {
     337       55724 :     if (Dep.getKind() != SDep::Order)
     338             :       return false;
     339      106846 :     return (!Source->getInstr()->isPHI() &&
     340       53419 :             !Dep.getSUnit()->getInstr()->isPHI());
     341             :   }
     342             : 
     343             :   bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
     344             : 
     345             :   /// The latency of the dependence.
     346        3869 :   unsigned getLatency(SUnit *Source, const SDep &Dep) {
     347             :     // Anti dependences represent recurrences, so use the latency of the
     348             :     // instruction on the back-edge.
     349        3869 :     if (Dep.getKind() == SDep::Anti) {
     350         272 :       if (Source->getInstr()->isPHI())
     351         200 :         return Dep.getSUnit()->Latency;
     352          45 :       if (Dep.getSUnit()->getInstr()->isPHI())
     353          27 :         return Source->Latency;
     354           9 :       return Dep.getLatency();
     355             :     }
     356        3633 :     return Dep.getLatency();
     357             :   }
     358             : 
     359             :   /// The distance function, which indicates that operation V of iteration I
     360             :   /// depends on operations U of iteration I-distance.
     361             :   unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
     362             :     // Instructions that feed a Phi have a distance of 1. Computing larger
     363             :     // values for arrays requires data dependence information.
     364        4112 :     if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
     365             :       return 1;
     366             :     return 0;
     367             :   }
     368             : 
     369             :   /// Set the Minimum Initiation Interval for this schedule attempt.
     370             :   void setMII(unsigned mii) { MII = mii; }
     371             : 
     372             :   MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
     373             :                                  bool UpdateDAG = false);
     374             : 
     375             :   /// Return the new base register that was stored away for the changed
     376             :   /// instruction.
     377             :   unsigned getInstrBaseReg(SUnit *SU) {
     378             :     DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
     379         206 :         InstrChanges.find(SU);
     380         618 :     if (It != InstrChanges.end())
     381          16 :       return It->second.first;
     382             :     return 0;
     383             :   }
     384             : 
     385             :   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
     386             :     Mutations.push_back(std::move(Mutation));
     387             :   }
     388             : 
     389             : private:
     390             :   void addLoopCarriedDependences(AliasAnalysis *AA);
     391             :   void updatePhiDependences();
     392             :   void changeDependences();
     393             :   unsigned calculateResMII();
     394             :   unsigned calculateRecMII(NodeSetType &RecNodeSets);
     395             :   void findCircuits(NodeSetType &NodeSets);
     396             :   void fuseRecs(NodeSetType &NodeSets);
     397             :   void removeDuplicateNodes(NodeSetType &NodeSets);
     398             :   void computeNodeFunctions(NodeSetType &NodeSets);
     399             :   void registerPressureFilter(NodeSetType &NodeSets);
     400             :   void colocateNodeSets(NodeSetType &NodeSets);
     401             :   void checkNodeSets(NodeSetType &NodeSets);
     402             :   void groupRemainingNodes(NodeSetType &NodeSets);
     403             :   void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
     404             :                          SetVector<SUnit *> &NodesAdded);
     405             :   void computeNodeOrder(NodeSetType &NodeSets);
     406             :   bool schedulePipeline(SMSchedule &Schedule);
     407             :   void generatePipelinedLoop(SMSchedule &Schedule);
     408             :   void generateProlog(SMSchedule &Schedule, unsigned LastStage,
     409             :                       MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
     410             :                       MBBVectorTy &PrologBBs);
     411             :   void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
     412             :                       MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
     413             :                       MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
     414             :   void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
     415             :                             MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
     416             :                             SMSchedule &Schedule, ValueMapTy *VRMap,
     417             :                             InstrMapTy &InstrMap, unsigned LastStageNum,
     418             :                             unsigned CurStageNum, bool IsLast);
     419             :   void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
     420             :                     MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
     421             :                     SMSchedule &Schedule, ValueMapTy *VRMap,
     422             :                     InstrMapTy &InstrMap, unsigned LastStageNum,
     423             :                     unsigned CurStageNum, bool IsLast);
     424             :   void removeDeadInstructions(MachineBasicBlock *KernelBB,
     425             :                               MBBVectorTy &EpilogBBs);
     426             :   void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
     427             :                       SMSchedule &Schedule);
     428             :   void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
     429             :                    MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
     430             :                    ValueMapTy *VRMap);
     431             :   bool computeDelta(MachineInstr &MI, unsigned &Delta);
     432             :   void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
     433             :                          unsigned Num);
     434             :   MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
     435             :                            unsigned InstStageNum);
     436             :   MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
     437             :                                     unsigned InstStageNum,
     438             :                                     SMSchedule &Schedule);
     439             :   void updateInstruction(MachineInstr *NewMI, bool LastDef,
     440             :                          unsigned CurStageNum, unsigned InstStageNum,
     441             :                          SMSchedule &Schedule, ValueMapTy *VRMap);
     442             :   MachineInstr *findDefInLoop(unsigned Reg);
     443             :   unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
     444             :                          unsigned LoopStage, ValueMapTy *VRMap,
     445             :                          MachineBasicBlock *BB);
     446             :   void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
     447             :                         SMSchedule &Schedule, ValueMapTy *VRMap,
     448             :                         InstrMapTy &InstrMap);
     449             :   void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
     450             :                              InstrMapTy &InstrMap, unsigned CurStageNum,
     451             :                              unsigned PhiNum, MachineInstr *Phi,
     452             :                              unsigned OldReg, unsigned NewReg,
     453             :                              unsigned PrevReg = 0);
     454             :   bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
     455             :                              unsigned &OffsetPos, unsigned &NewBase,
     456             :                              int64_t &NewOffset);
     457             :   void postprocessDAG();
     458             : };
     459             : 
     460             : /// A NodeSet contains a set of SUnit DAG nodes with additional information
     461             : /// that assigns a priority to the set.
     462        2315 : class NodeSet {
     463             :   SetVector<SUnit *> Nodes;
     464             :   bool HasRecurrence = false;
     465             :   unsigned RecMII = 0;
     466             :   int MaxMOV = 0;
     467             :   int MaxDepth = 0;
     468             :   unsigned Colocate = 0;
     469             :   SUnit *ExceedPressure = nullptr;
     470             : 
     471             : public:
     472             :   using iterator = SetVector<SUnit *>::const_iterator;
     473             : 
     474         224 :   NodeSet() = default;
     475         254 :   NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
     476             : 
     477         490 :   bool insert(SUnit *SU) { return Nodes.insert(SU); }
     478             : 
     479          31 :   void insert(iterator S, iterator E) { Nodes.insert(S, E); }
     480             : 
     481             :   template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
     482         222 :     return Nodes.remove_if(P);
     483             :   }
     484             : 
     485       10706 :   unsigned count(SUnit *SU) const { return Nodes.count(SU); }
     486             : 
     487             :   bool hasRecurrence() { return HasRecurrence; };
     488             : 
     489         928 :   unsigned size() const { return Nodes.size(); }
     490             : 
     491        2014 :   bool empty() const { return Nodes.empty(); }
     492             : 
     493        1372 :   SUnit *getNode(unsigned i) const { return Nodes[i]; };
     494             : 
     495         260 :   void setRecMII(unsigned mii) { RecMII = mii; };
     496             : 
     497          14 :   void setColocate(unsigned c) { Colocate = c; };
     498             : 
     499           1 :   void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
     500             : 
     501             :   bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
     502             : 
     503          38 :   int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
     504             : 
     505             :   int getRecMII() { return RecMII; }
     506             : 
     507             :   /// Summarize node functions for the entire node set.
     508         210 :   void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
     509        1355 :     for (SUnit *SU : *this) {
     510        1030 :       MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
     511        1545 :       MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
     512             :     }
     513         210 :   }
     514             : 
     515             :   void clear() {
     516         226 :     Nodes.clear();
     517         113 :     RecMII = 0;
     518         113 :     HasRecurrence = false;
     519         113 :     MaxMOV = 0;
     520         113 :     MaxDepth = 0;
     521         113 :     Colocate = 0;
     522         113 :     ExceedPressure = nullptr;
     523             :   }
     524             : 
     525         516 :   operator SetVector<SUnit *> &() { return Nodes; }
     526             : 
     527             :   /// Sort the node sets by importance. First, rank them by recurrence MII,
     528             :   /// then by mobility (least mobile done first), and finally by depth.
     529             :   /// Each node set may contain a colocate value which is used as the first
     530             :   /// tie breaker, if it's set.
     531             :   bool operator>(const NodeSet &RHS) const {
     532         208 :     if (RecMII == RHS.RecMII) {
     533         157 :       if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
     534           0 :         return Colocate < RHS.Colocate;
     535         157 :       if (MaxMOV == RHS.MaxMOV)
     536         106 :         return MaxDepth > RHS.MaxDepth;
     537          51 :       return MaxMOV < RHS.MaxMOV;
     538             :     }
     539          51 :     return RecMII > RHS.RecMII;
     540             :   }
     541             : 
     542             :   bool operator==(const NodeSet &RHS) const {
     543             :     return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
     544             :            MaxDepth == RHS.MaxDepth;
     545             :   }
     546             : 
     547             :   bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
     548             : 
     549        1334 :   iterator begin() { return Nodes.begin(); }
     550        1334 :   iterator end() { return Nodes.end(); }
     551             : 
     552             :   void print(raw_ostream &os) const {
     553             :     os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
     554             :        << " depth " << MaxDepth << " col " << Colocate << "\n";
     555             :     for (const auto &I : Nodes)
     556             :       os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
     557             :     os << "\n";
     558             :   }
     559             : 
     560             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     561             :   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
     562             : #endif
     563             : };
     564             : 
     565             : /// This class repesents the scheduled code.  The main data structure is a
     566             : /// map from scheduled cycle to instructions.  During scheduling, the
     567             : /// data structure explicitly represents all stages/iterations.   When
     568             : /// the algorithm finshes, the schedule is collapsed into a single stage,
     569             : /// which represents instructions from different loop iterations.
     570             : ///
     571             : /// The SMS algorithm allows negative values for cycles, so the first cycle
     572             : /// in the schedule is the smallest cycle value.
     573         560 : class SMSchedule {
     574             : private:
     575             :   /// Map from execution cycle to instructions.
     576             :   DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
     577             : 
     578             :   /// Map from instruction to execution cycle.
     579             :   std::map<SUnit *, int> InstrToCycle;
     580             : 
     581             :   /// Map for each register and the max difference between its uses and def.
     582             :   /// The first element in the pair is the max difference in stages. The
     583             :   /// second is true if the register defines a Phi value and loop value is
     584             :   /// scheduled before the Phi.
     585             :   std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
     586             : 
     587             :   /// Keep track of the first cycle value in the schedule.  It starts
     588             :   /// as zero, but the algorithm allows negative values.
     589             :   int FirstCycle = 0;
     590             : 
     591             :   /// Keep track of the last cycle value in the schedule.
     592             :   int LastCycle = 0;
     593             : 
     594             :   /// The initiation interval (II) for the schedule.
     595             :   int InitiationInterval = 0;
     596             : 
     597             :   /// Target machine information.
     598             :   const TargetSubtargetInfo &ST;
     599             : 
     600             :   /// Virtual register information.
     601             :   MachineRegisterInfo &MRI;
     602             : 
     603             :   std::unique_ptr<DFAPacketizer> Resources;
     604             : 
     605             : public:
     606         112 :   SMSchedule(MachineFunction *mf)
     607         336 :       : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
     608         784 :         Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
     609             : 
     610         162 :   void reset() {
     611         162 :     ScheduledInstrs.clear();
     612         324 :     InstrToCycle.clear();
     613         324 :     RegToStageDiff.clear();
     614         162 :     FirstCycle = 0;
     615         162 :     LastCycle = 0;
     616         162 :     InitiationInterval = 0;
     617         162 :   }
     618             : 
     619             :   /// Set the initiation interval for this schedule.
     620         158 :   void setInitiationInterval(int ii) { InitiationInterval = ii; }
     621             : 
     622             :   /// Return the first cycle in the completed schedule.  This
     623             :   /// can be a negative value.
     624             :   int getFirstCycle() const { return FirstCycle; }
     625             : 
     626             :   /// Return the last cycle in the finalized schedule.
     627         612 :   int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
     628             : 
     629             :   /// Return the cycle of the earliest scheduled instruction in the dependence
     630             :   /// chain.
     631             :   int earliestCycleInChain(const SDep &Dep);
     632             : 
     633             :   /// Return the cycle of the latest scheduled instruction in the dependence
     634             :   /// chain.
     635             :   int latestCycleInChain(const SDep &Dep);
     636             : 
     637             :   void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
     638             :                     int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
     639             :   bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
     640             : 
     641             :   /// Iterators for the cycle to instruction map.
     642             :   using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
     643             :   using const_sched_iterator =
     644             :       DenseMap<int, std::deque<SUnit *>>::const_iterator;
     645             : 
     646             :   /// Return true if the instruction is scheduled at the specified stage.
     647             :   bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
     648        1784 :     return (stageScheduled(SU) == (int)StageNum);
     649             :   }
     650             : 
     651             :   /// Return the stage for a scheduled instruction.  Return -1 if
     652             :   /// the instruction has not been scheduled.
     653       12928 :   int stageScheduled(SUnit *SU) const {
     654       25856 :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
     655       25856 :     if (it == InstrToCycle.end())
     656             :       return -1;
     657       12238 :     return (it->second - FirstCycle) / InitiationInterval;
     658             :   }
     659             : 
     660             :   /// Return the cycle for a scheduled instruction. This function normalizes
     661             :   /// the first cycle to be 0.
     662             :   unsigned cycleScheduled(SUnit *SU) const {
     663        6034 :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
     664             :     assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
     665        3017 :     return (it->second - FirstCycle) % InitiationInterval;
     666             :   }
     667             : 
     668             :   /// Return the maximum stage count needed for this schedule.
     669             :   unsigned getMaxStageCount() {
     670        3902 :     return (LastCycle - FirstCycle) / InitiationInterval;
     671             :   }
     672             : 
     673             :   /// Return the max. number of stages/iterations that can occur between a
     674             :   /// register definition and its uses.
     675        1221 :   unsigned getStagesForReg(int Reg, unsigned CurStage) {
     676        1221 :     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
     677        2442 :     if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
     678             :       return 1;
     679        1219 :     return Stages.first;
     680             :   }
     681             : 
     682             :   /// The number of stages for a Phi is a little different than other
     683             :   /// instructions. The minimum value computed in RegToStageDiff is 1
     684             :   /// because we assume the Phi is needed for at least 1 iteration.
     685             :   /// This is not the case if the loop value is scheduled prior to the
     686             :   /// Phi in the same stage.  This function returns the number of stages
     687             :   /// or iterations needed between the Phi definition and any uses.
     688             :   unsigned getStagesForPhi(int Reg) {
     689         176 :     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
     690         176 :     if (Stages.second)
     691             :       return Stages.first;
     692         174 :     return Stages.first - 1;
     693             :   }
     694             : 
     695             :   /// Return the instructions that are scheduled at the specified cycle.
     696             :   std::deque<SUnit *> &getInstructions(int cycle) {
     697       23304 :     return ScheduledInstrs[cycle];
     698             :   }
     699             : 
     700             :   bool isValidSchedule(SwingSchedulerDAG *SSD);
     701             :   void finalizeSchedule(SwingSchedulerDAG *SSD);
     702             :   bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
     703             :                        std::deque<SUnit *> &Insts);
     704             :   bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
     705             :   bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
     706             :                              MachineOperand &MO);
     707             :   void print(raw_ostream &os) const;
     708             :   void dump() const;
     709             : };
     710             : 
     711             : } // end anonymous namespace
     712             : 
     713             : unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
     714             : char MachinePipeliner::ID = 0;
     715             : #ifndef NDEBUG
     716             : int MachinePipeliner::NumTries = 0;
     717             : #endif
     718             : char &llvm::MachinePipelinerID = MachinePipeliner::ID;
     719             : 
     720       20212 : INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
     721             :                       "Modulo Software Pipelining", false, false)
     722       20212 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     723       20212 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     724       20212 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     725       20212 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     726      152414 : INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
     727             :                     "Modulo Software Pipelining", false, false)
     728             : 
     729             : /// The "main" function for implementing Swing Modulo Scheduling.
     730         860 : bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
     731         860 :   if (skipFunction(*mf.getFunction()))
     732             :     return false;
     733             : 
     734         860 :   if (!EnableSWP)
     735             :     return false;
     736             : 
     737         868 :   if (mf.getFunction()->getAttributes().hasAttribute(
     738         868 :           AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
     739          15 :       !EnableSWPOptSize.getPosition())
     740             :     return false;
     741             : 
     742         838 :   MF = &mf;
     743         838 :   MLI = &getAnalysis<MachineLoopInfo>();
     744         838 :   MDT = &getAnalysis<MachineDominatorTree>();
     745         838 :   TII = MF->getSubtarget().getInstrInfo();
     746         838 :   RegClassInfo.runOnMachineFunction(*MF);
     747             : 
     748        3534 :   for (auto &L : *MLI)
     749         182 :     scheduleLoop(*L);
     750             : 
     751             :   return false;
     752             : }
     753             : 
     754             : /// Attempt to perform the SMS algorithm on the specified loop. This function is
     755             : /// the main entry point for the algorithm.  The function identifies candidate
     756             : /// loops, calculates the minimum initiation interval, and attempts to schedule
     757             : /// the loop.
     758         196 : bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
     759         196 :   bool Changed = false;
     760         798 :   for (auto &InnerLoop : L)
     761          14 :     Changed |= scheduleLoop(*InnerLoop);
     762             : 
     763             : #ifndef NDEBUG
     764             :   // Stop trying after reaching the limit (if any).
     765             :   int Limit = SwpLoopLimit;
     766             :   if (Limit >= 0) {
     767             :     if (NumTries >= SwpLoopLimit)
     768             :       return Changed;
     769             :     NumTries++;
     770             :   }
     771             : #endif
     772             : 
     773         196 :   if (!canPipelineLoop(L))
     774             :     return Changed;
     775             : 
     776         113 :   ++NumTrytoPipeline;
     777             : 
     778         113 :   Changed = swingModuloScheduler(L);
     779             : 
     780         113 :   return Changed;
     781             : }
     782             : 
     783             : /// Return true if the loop can be software pipelined.  The algorithm is
     784             : /// restricted to loops with a single basic block.  Make sure that the
     785             : /// branch in the loop can be analyzed.
     786         196 : bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
     787         392 :   if (L.getNumBlocks() != 1)
     788             :     return false;
     789             : 
     790             :   // Check if the branch can't be understood because we can't do pipelining
     791             :   // if that's the case.
     792         163 :   LI.TBB = nullptr;
     793         163 :   LI.FBB = nullptr;
     794         326 :   LI.BrCond.clear();
     795         326 :   if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
     796             :     return false;
     797             : 
     798         163 :   LI.LoopInductionVar = nullptr;
     799         163 :   LI.LoopCompare = nullptr;
     800         163 :   if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
     801             :     return false;
     802             : 
     803         114 :   if (!L.getLoopPreheader())
     804             :     return false;
     805             : 
     806             :   // If any of the Phis contain subregs, then we can't pipeline
     807             :   // because we don't know how to maintain subreg information in the
     808             :   // VMap structure.
     809         228 :   MachineBasicBlock *MBB = L.getHeader();
     810         228 :   for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
     811         114 :                                    BBE = MBB->getFirstNonPHI();
     812         313 :        BBI != BBE; ++BBI)
     813         996 :     for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
     814        1197 :       if (BBI->getOperand(i).getSubReg() != 0)
     815           1 :         return false;
     816             : 
     817         113 :   return true;
     818             : }
     819             : 
     820             : /// The SMS algorithm consists of the following main steps:
     821             : /// 1. Computation and analysis of the dependence graph.
     822             : /// 2. Ordering of the nodes (instructions).
     823             : /// 3. Attempt to Schedule the loop.
     824         113 : bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
     825             :   assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
     826             : 
     827         226 :   SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
     828             : 
     829         226 :   MachineBasicBlock *MBB = L.getHeader();
     830             :   // The kernel should not include any terminator instructions.  These
     831             :   // will be added back later.
     832         113 :   SMS.startBlock(MBB);
     833             : 
     834             :   // Compute the number of 'real' instructions in the basic block by
     835             :   // ignoring terminators.
     836         113 :   unsigned size = MBB->size();
     837         339 :   for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
     838         113 :                                    E = MBB->instr_end();
     839         339 :        I != E; ++I, --size)
     840             :     ;
     841             : 
     842         226 :   SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
     843         113 :   SMS.schedule();
     844         113 :   SMS.exitRegion();
     845             : 
     846         113 :   SMS.finishBlock();
     847         226 :   return SMS.hasNewSchedule();
     848             : }
     849             : 
     850             : /// We override the schedule function in ScheduleDAGInstrs to implement the
     851             : /// scheduling part of the Swing Modulo Scheduling algorithm.
     852         113 : void SwingSchedulerDAG::schedule() {
     853         226 :   AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
     854         113 :   buildSchedGraph(AA);
     855         113 :   addLoopCarriedDependences(AA);
     856         113 :   updatePhiDependences();
     857         113 :   Topo.InitDAGTopologicalSorting();
     858         113 :   postprocessDAG();
     859         113 :   changeDependences();
     860             :   DEBUG({
     861             :     for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
     862             :       SUnits[su].dumpAll(this);
     863             :   });
     864             : 
     865         195 :   NodeSetType NodeSets;
     866         113 :   findCircuits(NodeSets);
     867             : 
     868             :   // Calculate the MII.
     869         113 :   unsigned ResMII = calculateResMII();
     870         226 :   unsigned RecMII = calculateRecMII(NodeSets);
     871             : 
     872         113 :   fuseRecs(NodeSets);
     873             : 
     874             :   // This flag is used for testing and can cause correctness problems.
     875         113 :   if (SwpIgnoreRecMII)
     876           0 :     RecMII = 0;
     877             : 
     878         113 :   MII = std::max(ResMII, RecMII);
     879             :   DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
     880             :                << ")\n");
     881             : 
     882             :   // Can't schedule a loop without a valid MII.
     883         113 :   if (MII == 0)
     884          31 :     return;
     885             : 
     886             :   // Don't pipeline large loops.
     887         226 :   if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
     888             :     return;
     889             : 
     890         112 :   computeNodeFunctions(NodeSets);
     891             : 
     892         112 :   registerPressureFilter(NodeSets);
     893             : 
     894         112 :   colocateNodeSets(NodeSets);
     895             : 
     896         112 :   checkNodeSets(NodeSets);
     897             : 
     898             :   DEBUG({
     899             :     for (auto &I : NodeSets) {
     900             :       dbgs() << "  Rec NodeSet ";
     901             :       I.dump();
     902             :     }
     903             :   });
     904             : 
     905         336 :   std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
     906             : 
     907         112 :   groupRemainingNodes(NodeSets);
     908             : 
     909         112 :   removeDuplicateNodes(NodeSets);
     910             : 
     911             :   DEBUG({
     912             :     for (auto &I : NodeSets) {
     913             :       dbgs() << "  NodeSet ";
     914             :       I.dump();
     915             :     }
     916             :   });
     917             : 
     918         112 :   computeNodeOrder(NodeSets);
     919             : 
     920         194 :   SMSchedule Schedule(Pass.MF);
     921         112 :   Scheduled = schedulePipeline(Schedule);
     922             : 
     923         112 :   if (!Scheduled)
     924          30 :     return;
     925             : 
     926         164 :   unsigned numStages = Schedule.getMaxStageCount();
     927             :   // No need to generate pipeline if there are no overlapped iterations.
     928          82 :   if (numStages == 0)
     929             :     return;
     930             : 
     931             :   // Check that the maximum stage count is less than user-defined limit.
     932         164 :   if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
     933             :     return;
     934             : 
     935          82 :   generatePipelinedLoop(Schedule);
     936          82 :   ++NumPipelined;
     937             : }
     938             : 
     939             : /// Clean up after the software pipeliner runs.
     940         113 : void SwingSchedulerDAG::finishBlock() {
     941         120 :   for (MachineInstr *I : NewMIs)
     942           7 :     MF.DeleteMachineInstr(I);
     943         113 :   NewMIs.clear();
     944             : 
     945             :   // Call the superclass.
     946         113 :   ScheduleDAGInstrs::finishBlock();
     947         113 : }
     948             : 
     949             : /// Return the register values for  the operands of a Phi instruction.
     950             : /// This function assume the instruction is a Phi.
     951             : static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
     952             :                        unsigned &InitVal, unsigned &LoopVal) {
     953             :   assert(Phi.isPHI() && "Expecting a Phi.");
     954             : 
     955        1267 :   InitVal = 0;
     956         968 :   LoopVal = 0;
     957        3801 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     958        5068 :     if (Phi.getOperand(i + 1).getMBB() != Loop)
     959         926 :       InitVal = Phi.getOperand(i).getReg();
     960             :     else
     961        2534 :       LoopVal = Phi.getOperand(i).getReg();
     962             : 
     963             :   assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
     964             : }
     965             : 
     966             : /// Return the Phi register value that comes from the incoming block.
     967             : static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
     968         144 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     969         288 :     if (Phi.getOperand(i + 1).getMBB() != LoopBB)
     970         288 :       return Phi.getOperand(i).getReg();
     971             :   return 0;
     972             : }
     973             : 
     974             : /// Return the Phi register value that comes the the loop block.
     975             : static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
     976        3147 :   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
     977        6112 :     if (Phi.getOperand(i + 1).getMBB() == LoopBB)
     978        3012 :       return Phi.getOperand(i).getReg();
     979             :   return 0;
     980             : }
     981             : 
     982             : /// Return true if SUb can be reached from SUa following the chain edges.
     983          49 : static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
     984          98 :   SmallPtrSet<SUnit *, 8> Visited;
     985          98 :   SmallVector<SUnit *, 8> Worklist;
     986          49 :   Worklist.push_back(SUa);
     987         125 :   while (!Worklist.empty()) {
     988          99 :     const SUnit *SU = Worklist.pop_back_val();
     989         396 :     for (auto &SI : SU->Succs) {
     990         122 :       SUnit *SuccSU = SI.getSUnit();
     991         122 :       if (SI.getKind() == SDep::Order) {
     992          73 :         if (Visited.count(SuccSU))
     993           0 :           continue;
     994          73 :         if (SuccSU == SUb)
     995          23 :           return true;
     996          50 :         Worklist.push_back(SuccSU);
     997          50 :         Visited.insert(SuccSU);
     998             :       }
     999             :     }
    1000             :   }
    1001             :   return false;
    1002             : }
    1003             : 
    1004             : /// Return true if the instruction causes a chain between memory
    1005             : /// references before and after it.
    1006        1361 : static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
    1007        2523 :   return MI.isCall() || MI.hasUnmodeledSideEffects() ||
    1008        1170 :          (MI.hasOrderedMemoryRef() &&
    1009          16 :           (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
    1010             : }
    1011             : 
    1012             : /// Return the underlying objects for the memory references of an instruction.
    1013             : /// This function calls the code in ValueTracking, but first checks that the
    1014             : /// instruction has a memory operand.
    1015         766 : static void getUnderlyingObjects(MachineInstr *MI,
    1016             :                                  SmallVectorImpl<Value *> &Objs,
    1017             :                                  const DataLayout &DL) {
    1018         766 :   if (!MI->hasOneMemOperand())
    1019             :     return;
    1020         726 :   MachineMemOperand *MM = *MI->memoperands_begin();
    1021         200 :   if (!MM->getValue())
    1022             :     return;
    1023         200 :   GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
    1024             : }
    1025             : 
    1026             : /// Add a chain edge between a load and store if the store can be an
    1027             : /// alias of the load on a subsequent iteration, i.e., a loop carried
    1028             : /// dependence. This code is very similar to the code in ScheduleDAGInstrs
    1029             : /// but that code doesn't create loop carried dependences.
    1030         113 : void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
    1031         226 :   MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
    1032        1813 :   for (auto &SU : SUnits) {
    1033        1361 :     MachineInstr &MI = *SU.getInstr();
    1034        1361 :     if (isDependenceBarrier(MI, AA))
    1035             :       PendingLoads.clear();
    1036        1154 :     else if (MI.mayLoad()) {
    1037         838 :       SmallVector<Value *, 4> Objs;
    1038         419 :       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
    1039        1378 :       for (auto V : Objs) {
    1040         121 :         SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
    1041         121 :         SUs.push_back(&SU);
    1042             :       }
    1043         735 :     } else if (MI.mayStore()) {
    1044         694 :       SmallVector<Value *, 4> Objs;
    1045         347 :       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
    1046        1120 :       for (auto V : Objs) {
    1047             :         MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
    1048          79 :             PendingLoads.find(V);
    1049         158 :         if (I == PendingLoads.end())
    1050          56 :           continue;
    1051         118 :         for (auto Load : I->second) {
    1052          49 :           if (isSuccOrder(Load, &SU))
    1053          59 :             continue;
    1054          26 :           MachineInstr &LdMI = *Load->getInstr();
    1055             :           // First, perform the cheaper check that compares the base register.
    1056             :           // If they are the same and the load offset is less than the store
    1057             :           // offset, then mark the dependence as loop carried potentially.
    1058             :           unsigned BaseReg1, BaseReg2;
    1059             :           int64_t Offset1, Offset2;
    1060          52 :           if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
    1061          26 :               !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
    1062           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1063           0 :             continue;            
    1064             :           }
    1065          39 :           if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
    1066             :             assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
    1067             :                    "What happened to the chain edge?");
    1068          13 :             SU.addPred(SDep(Load, SDep::Barrier));
    1069          13 :             continue;
    1070             :           }
    1071             :           // Second, the more expensive check that uses alias analysis on the
    1072             :           // base registers. If they alias, and the load offset is less than
    1073             :           // the store offset, the mark the dependence as loop carried.
    1074          13 :           if (!AA) {
    1075           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1076           0 :             continue;
    1077             :           }
    1078          13 :           MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
    1079          13 :           MachineMemOperand *MMO2 = *MI.memoperands_begin();
    1080          26 :           if (!MMO1->getValue() || !MMO2->getValue()) {
    1081           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1082           0 :             continue;
    1083             :           }
    1084          39 :           if (MMO1->getValue() == MMO2->getValue() &&
    1085          13 :               MMO1->getOffset() <= MMO2->getOffset()) {
    1086           0 :             SU.addPred(SDep(Load, SDep::Barrier));
    1087           0 :             continue;
    1088             :           }
    1089             :           AliasResult AAResult = AA->alias(
    1090          39 :               MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
    1091          13 :                              MMO1->getAAInfo()),
    1092          39 :               MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
    1093          26 :                              MMO2->getAAInfo()));
    1094             : 
    1095          13 :           if (AAResult != NoAlias)
    1096          13 :             SU.addPred(SDep(Load, SDep::Barrier));
    1097             :         }
    1098             :       }
    1099             :     }
    1100             :   }
    1101         113 : }
    1102             : 
    1103             : /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
    1104             : /// processes dependences for PHIs. This function adds true dependences
    1105             : /// from a PHI to a use, and a loop carried dependence from the use to the
    1106             : /// PHI. The loop carried dependence is represented as an anti dependence
    1107             : /// edge. This function also removes chain dependences between unrelated
    1108             : /// PHIs.
    1109         113 : void SwingSchedulerDAG::updatePhiDependences() {
    1110         226 :   SmallVector<SDep, 4> RemoveDeps;
    1111         113 :   const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
    1112             : 
    1113             :   // Iterate over each DAG node.
    1114        1813 :   for (SUnit &I : SUnits) {
    1115        1361 :     RemoveDeps.clear();
    1116             :     // Set to true if the instruction has an operand defined by a Phi.
    1117        1361 :     unsigned HasPhiUse = 0;
    1118        1361 :     unsigned HasPhiDef = 0;
    1119        1361 :     MachineInstr *MI = I.getInstr();
    1120             :     // Iterate over each operand, and we process the definitions.
    1121        6054 :     for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
    1122        1361 :                                     MOE = MI->operands_end();
    1123        6054 :          MOI != MOE; ++MOI) {
    1124        4693 :       if (!MOI->isReg())
    1125        1883 :         continue;
    1126        2810 :       unsigned Reg = MOI->getReg();
    1127        2810 :       if (MOI->isDef()) {
    1128             :         // If the register is used by a Phi, then create an anti dependence.
    1129             :         for (MachineRegisterInfo::use_instr_iterator
    1130        1051 :                  UI = MRI.use_instr_begin(Reg),
    1131        1051 :                  UE = MRI.use_instr_end();
    1132        2405 :              UI != UE; ++UI) {
    1133        1354 :           MachineInstr *UseMI = &*UI;
    1134        2708 :           SUnit *SU = getSUnit(UseMI);
    1135        1322 :           if (SU != nullptr && UseMI->isPHI()) {
    1136         189 :             if (!MI->isPHI()) {
    1137         189 :               SDep Dep(SU, SDep::Anti, Reg);
    1138         189 :               I.addPred(Dep);
    1139             :             } else {
    1140           6 :               HasPhiDef = Reg;
    1141             :               // Add a chain edge to a dependent Phi that isn't an existing
    1142             :               // predecessor.
    1143           7 :               if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
    1144           0 :                 I.addPred(SDep(SU, SDep::Barrier));
    1145             :             }
    1146             :           }
    1147             :         }
    1148        1759 :       } else if (MOI->isUse()) {
    1149             :         // If the register is defined by a Phi, then create a true dependence.
    1150        1759 :         MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
    1151        1759 :         if (DefMI == nullptr)
    1152          12 :           continue;
    1153        3494 :         SUnit *SU = getSUnit(DefMI);
    1154        1329 :         if (SU != nullptr && DefMI->isPHI()) {
    1155         380 :           if (!MI->isPHI()) {
    1156         380 :             SDep Dep(SU, SDep::Data, Reg);
    1157         760 :             Dep.setLatency(0);
    1158         380 :             ST.adjustSchedDependency(SU, &I, Dep);
    1159         380 :             I.addPred(Dep);
    1160             :           } else {
    1161           6 :             HasPhiUse = Reg;
    1162             :             // Add a chain edge to a dependent Phi that isn't an existing
    1163             :             // predecessor.
    1164          11 :             if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
    1165           0 :               I.addPred(SDep(SU, SDep::Barrier));
    1166             :           }
    1167             :         }
    1168             :       }
    1169             :     }
    1170             :     // Remove order dependences from an unrelated Phi.
    1171        1361 :     if (!SwpPruneDeps)
    1172           0 :       continue;
    1173       93009 :     for (auto &PI : I.Preds) {
    1174       88926 :       MachineInstr *PMI = PI.getSUnit()->getInstr();
    1175        1408 :       if (PMI->isPHI() && PI.getKind() == SDep::Order) {
    1176         835 :         if (I.getInstr()->isPHI()) {
    1177          85 :           if (PMI->getOperand(0).getReg() == HasPhiUse)
    1178           5 :             continue;
    1179         160 :           if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
    1180           1 :             continue;
    1181             :         }
    1182         829 :         RemoveDeps.push_back(PI);
    1183             :       }
    1184             :     }
    1185        3551 :     for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
    1186        1658 :       I.removePred(RemoveDeps[i]);
    1187             :   }
    1188         113 : }
    1189             : 
    1190             : /// Iterate over each DAG node and see if we can change any dependences
    1191             : /// in order to reduce the recurrence MII.
    1192         113 : void SwingSchedulerDAG::changeDependences() {
    1193             :   // See if an instruction can use a value from the previous iteration.
    1194             :   // If so, we update the base and offset of the instruction and change
    1195             :   // the dependences.
    1196        1813 :   for (SUnit &I : SUnits) {
    1197        1361 :     unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
    1198        1361 :     int64_t NewOffset = 0;
    1199        1361 :     if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
    1200             :                                NewOffset))
    1201        2703 :       continue;
    1202             : 
    1203             :     // Get the MI and SUnit for the instruction that defines the original base.
    1204          20 :     unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
    1205          10 :     MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
    1206          10 :     if (!DefMI)
    1207           0 :       continue;
    1208          20 :     SUnit *DefSU = getSUnit(DefMI);
    1209          10 :     if (!DefSU)
    1210           0 :       continue;
    1211             :     // Get the MI and SUnit for the instruction that defins the new base.
    1212          10 :     MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
    1213          10 :     if (!LastMI)
    1214           0 :       continue;
    1215          20 :     SUnit *LastSU = getSUnit(LastMI);
    1216          10 :     if (!LastSU)
    1217           0 :       continue;
    1218             : 
    1219          10 :     if (Topo.IsReachable(&I, LastSU))
    1220           1 :       continue;
    1221             : 
    1222             :     // Remove the dependence. The value now depends on a prior iteration.
    1223          18 :     SmallVector<SDep, 4> Deps;
    1224          36 :     for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
    1225             :          ++P)
    1226           9 :       if (P->getSUnit() == DefSU)
    1227           9 :         Deps.push_back(*P);
    1228          27 :     for (int i = 0, e = Deps.size(); i != e; i++) {
    1229          27 :       Topo.RemovePred(&I, Deps[i].getSUnit());
    1230          18 :       I.removePred(Deps[i]);
    1231             :     }
    1232             :     // Remove the chain dependence between the instructions.
    1233           9 :     Deps.clear();
    1234          65 :     for (auto &P : LastSU->Preds)
    1235          47 :       if (P.getSUnit() == &I && P.getKind() == SDep::Order)
    1236           9 :         Deps.push_back(P);
    1237          27 :     for (int i = 0, e = Deps.size(); i != e; i++) {
    1238          27 :       Topo.RemovePred(LastSU, Deps[i].getSUnit());
    1239          18 :       LastSU->removePred(Deps[i]);
    1240             :     }
    1241             : 
    1242             :     // Add a dependence between the new instruction and the instruction
    1243             :     // that defines the new base.
    1244          18 :     SDep Dep(&I, SDep::Anti, NewBase);
    1245           9 :     LastSU->addPred(Dep);
    1246             : 
    1247             :     // Remember the base and offset information so that we can update the
    1248             :     // instruction during code generation.
    1249          36 :     InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
    1250             :   }
    1251         113 : }
    1252             : 
    1253             : namespace {
    1254             : 
    1255             : // FuncUnitSorter - Comparison operator used to sort instructions by
    1256             : // the number of functional unit choices.
    1257       37506 : struct FuncUnitSorter {
    1258             :   const InstrItineraryData *InstrItins;
    1259             :   DenseMap<unsigned, unsigned> Resources;
    1260             : 
    1261         226 :   FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
    1262             : 
    1263             :   // Compute the number of functional unit alternatives needed
    1264             :   // at each stage, and take the minimum value. We prioritize the
    1265             :   // instructions by the least number of choices first.
    1266             :   unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
    1267       12566 :     unsigned schedClass = Inst->getDesc().getSchedClass();
    1268       12566 :     unsigned min = UINT_MAX;
    1269       39698 :     for (const InstrStage *IS = InstrItins->beginStage(schedClass),
    1270       25132 :                           *IE = InstrItins->endStage(schedClass);
    1271       27132 :          IS != IE; ++IS) {
    1272       14566 :       unsigned funcUnits = IS->getUnits();
    1273       14566 :       unsigned numAlternatives = countPopulation(funcUnits);
    1274       14566 :       if (numAlternatives < min) {
    1275       13207 :         min = numAlternatives;
    1276       13207 :         F = funcUnits;
    1277             :       }
    1278             :     }
    1279             :     return min;
    1280             :   }
    1281             : 
    1282             :   // Compute the critical resources needed by the instruction. This
    1283             :   // function records the functional units needed by instructions that
    1284             :   // must use only one functional unit. We use this as a tie breaker
    1285             :   // for computing the resource MII. The instrutions that require
    1286             :   // the same, highly used, functional unit have high priority.
    1287        1165 :   void calcCriticalResources(MachineInstr &MI) {
    1288        1165 :     unsigned SchedClass = MI.getDesc().getSchedClass();
    1289        3711 :     for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
    1290        2330 :                           *IE = InstrItins->endStage(SchedClass);
    1291        2546 :          IS != IE; ++IS) {
    1292        1381 :       unsigned FuncUnits = IS->getUnits();
    1293        2762 :       if (countPopulation(FuncUnits) == 1)
    1294         394 :         Resources[FuncUnits]++;
    1295             :     }
    1296        1165 :   }
    1297             : 
    1298             :   /// Return true if IS1 has less priority than IS2.
    1299        6283 :   bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
    1300        6283 :     unsigned F1 = 0, F2 = 0;
    1301       12566 :     unsigned MFUs1 = minFuncUnits(IS1, F1);
    1302       12566 :     unsigned MFUs2 = minFuncUnits(IS2, F2);
    1303        6283 :     if (MFUs1 == 1 && MFUs2 == 1)
    1304         999 :       return Resources.lookup(F1) < Resources.lookup(F2);
    1305        5950 :     return MFUs1 > MFUs2;
    1306             :   }
    1307             : };
    1308             : 
    1309             : } // end anonymous namespace
    1310             : 
    1311             : /// Calculate the resource constrained minimum initiation interval for the
    1312             : /// specified loop. We use the DFA to model the resources needed for
    1313             : /// each instruction, and we ignore dependences. A different DFA is created
    1314             : /// for each cycle that is required. When adding a new instruction, we attempt
    1315             : /// to add it to each existing DFA, until a legal space is found. If the
    1316             : /// instruction cannot be reserved in an existing DFA, we create a new one.
    1317         113 : unsigned SwingSchedulerDAG::calculateResMII() {
    1318         226 :   SmallVector<DFAPacketizer *, 8> Resources;
    1319         226 :   MachineBasicBlock *MBB = Loop.getHeader();
    1320         113 :   Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
    1321             : 
    1322             :   // Sort the instructions by the number of available choices for scheduling,
    1323             :   // least to most. Use the number of critical resources as the tie breaker.
    1324             :   FuncUnitSorter FUS =
    1325         339 :       FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
    1326         113 :   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
    1327         113 :                                    E = MBB->getFirstTerminator();
    1328        1278 :        I != E; ++I)
    1329        1165 :     FUS.calcCriticalResources(*I);
    1330             :   PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
    1331         452 :       FuncUnitOrder(FUS);
    1332             : 
    1333         113 :   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
    1334         113 :                                    E = MBB->getFirstTerminator();
    1335        1278 :        I != E; ++I)
    1336        1165 :     FuncUnitOrder.push(&*I);
    1337             : 
    1338        1278 :   while (!FuncUnitOrder.empty()) {
    1339        1165 :     MachineInstr *MI = FuncUnitOrder.top();
    1340        1165 :     FuncUnitOrder.pop();
    1341        2330 :     if (TII->isZeroCost(MI->getOpcode()))
    1342          18 :       continue;
    1343             :     // Attempt to reserve the instruction in an existing DFA. At least one
    1344             :     // DFA is needed for each cycle.
    1345        2294 :     unsigned NumCycles = getSUnit(MI)->Latency;
    1346        1147 :     unsigned ReservedCycles = 0;
    1347        1147 :     SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
    1348        1147 :     SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
    1349        2294 :     for (unsigned C = 0; C < NumCycles; ++C)
    1350       72415 :       while (RI != RE) {
    1351       72062 :         if ((*RI++)->canReserveResources(*MI)) {
    1352         794 :           ++ReservedCycles;
    1353         794 :           break;
    1354             :         }
    1355             :       }
    1356             :     // Start reserving resources using existing DFAs.
    1357        2735 :     for (unsigned C = 0; C < ReservedCycles; ++C) {
    1358         794 :       --RI;
    1359         794 :       (*RI)->reserveResources(*MI);
    1360             :     }
    1361             :     // Add new DFAs, if needed, to reserve resources.
    1362        1853 :     for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
    1363             :       DFAPacketizer *NewResource =
    1364         353 :           TII->CreateTargetScheduleState(MF.getSubtarget());
    1365             :       assert(NewResource->canReserveResources(*MI) && "Reserve error.");
    1366         353 :       NewResource->reserveResources(*MI);
    1367         353 :       Resources.push_back(NewResource);
    1368             :     }
    1369             :   }
    1370         113 :   int Resmii = Resources.size();
    1371             :   // Delete the memory for each of the DFAs that were created earlier.
    1372         579 :   for (DFAPacketizer *RI : Resources) {
    1373         466 :     DFAPacketizer *D = RI;
    1374         932 :     delete D;
    1375             :   }
    1376         113 :   Resources.clear();
    1377         226 :   return Resmii;
    1378             : }
    1379             : 
    1380             : /// Calculate the recurrence-constrainted minimum initiation interval.
    1381             : /// Iterate over each circuit.  Compute the delay(c) and distance(c)
    1382             : /// for each circuit. The II needs to satisfy the inequality
    1383             : /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
    1384             : /// II that satistifies the inequality, and the RecMII is the maximum
    1385             : /// of those values.
    1386             : unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
    1387         113 :   unsigned RecMII = 0;
    1388             : 
    1389         593 :   for (NodeSet &Nodes : NodeSets) {
    1390         254 :     if (Nodes.empty())
    1391             :       continue;
    1392             : 
    1393         254 :     unsigned Delay = Nodes.size() - 1;
    1394         254 :     unsigned Distance = 1;
    1395             : 
    1396             :     // ii = ceil(delay / distance)
    1397         254 :     unsigned CurMII = (Delay + Distance - 1) / Distance;
    1398         508 :     Nodes.setRecMII(CurMII);
    1399         254 :     if (CurMII > RecMII)
    1400         135 :       RecMII = CurMII;
    1401             :   }
    1402             : 
    1403             :   return RecMII;
    1404             : }
    1405             : 
    1406             : /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
    1407             : /// but we do this to find the circuits, and then change them back.
    1408         226 : static void swapAntiDependences(std::vector<SUnit> &SUnits) {
    1409         452 :   SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
    1410        3174 :   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
    1411        5444 :     SUnit *SU = &SUnits[i];
    1412      184342 :     for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
    1413      178898 :          IP != EP; ++IP) {
    1414      176176 :       if (IP->getKind() != SDep::Anti)
    1415      175780 :         continue;
    1416         792 :       DepsAdded.push_back(std::make_pair(SU, *IP));
    1417             :     }
    1418             :   }
    1419         622 :   for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
    1420         226 :                                                           E = DepsAdded.end();
    1421         622 :        I != E; ++I) {
    1422             :     // Remove this anti dependency and add one in the reverse direction.
    1423         396 :     SUnit *SU = I->first;
    1424         396 :     SDep &D = I->second;
    1425         396 :     SUnit *TargetSU = D.getSUnit();
    1426         396 :     unsigned Reg = D.getReg();
    1427         396 :     unsigned Lat = D.getLatency();
    1428         396 :     SU->removePred(D);
    1429         396 :     SDep Dep(SU, SDep::Anti, Reg);
    1430         792 :     Dep.setLatency(Lat);
    1431         396 :     TargetSU->addPred(Dep);
    1432             :   }
    1433         226 : }
    1434             : 
    1435             : /// Create the adjacency structure of the nodes in the graph.
    1436         113 : void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
    1437             :     SwingSchedulerDAG *DAG) {
    1438         339 :   BitVector Added(SUnits.size());
    1439        1587 :   for (int i = 0, e = SUnits.size(); i != e; ++i) {
    1440        1361 :     Added.reset();
    1441             :     // Add any successor to the adjacency matrix and exclude duplicates.
    1442       93532 :     for (auto &SI : SUnits[i].Succs) {
    1443             :       // Do not process a boundary node and a back-edge is processed only
    1444             :       // if it goes to a Phi.
    1445      176185 :       if (SI.getSUnit()->isBoundaryNode() ||
    1446       88286 :           (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
    1447           9 :         continue;
    1448       88079 :       int N = SI.getSUnit()->NodeNum;
    1449      176158 :       if (!Added.test(N)) {
    1450      175598 :         AdjK[i].push_back(N);
    1451       87799 :         Added.set(N);
    1452             :       }
    1453             :     }
    1454             :     // A chain edge between a store and a load is treated as a back-edge in the
    1455             :     // adjacency matrix.
    1456       93532 :     for (auto &PI : SUnits[i].Preds) {
    1457      282701 :       if (!SUnits[i].getInstr()->mayStore() ||
    1458      105954 :           !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
    1459       53548 :         continue;
    1460       69080 :       if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
    1461       34539 :         int N = PI.getSUnit()->NodeNum;
    1462       69078 :         if (!Added.test(N)) {
    1463       69078 :           AdjK[i].push_back(N);
    1464       34539 :           Added.set(N);
    1465             :         }
    1466             :       }
    1467             :     }
    1468             :   }
    1469         113 : }
    1470             : 
    1471             : /// Identify an elementary circuit in the dependence graph starting at the
    1472             : /// specified node.
    1473      148403 : bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
    1474             :                                           bool HasBackedge) {
    1475      296806 :   SUnit *SV = &SUnits[V];
    1476      148403 :   bool F = false;
    1477      148403 :   Stack.insert(SV);
    1478      296806 :   Blocked.set(V);
    1479             : 
    1480    19112605 :   for (auto W : AdjK[V]) {
    1481    18588222 :     if (NumPaths > MaxPaths)
    1482             :       break;
    1483    18520882 :     if (W < S)
    1484     3069654 :       continue;
    1485    15451228 :     if (W == S) {
    1486        1889 :       if (!HasBackedge)
    1487        1270 :         NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
    1488        1889 :       F = true;
    1489        1889 :       ++NumPaths;
    1490             :       break;
    1491    30898678 :     } else if (!Blocked.test(W)) {
    1492      147042 :       if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
    1493       70639 :         F = true;
    1494             :     }
    1495             :   }
    1496             : 
    1497      146514 :   if (F)
    1498       71122 :     unblock(V);
    1499             :   else {
    1500    14294438 :     for (auto W : AdjK[V]) {
    1501    13985314 :       if (W < S)
    1502     3069612 :         continue;
    1503    21831404 :       if (B[W].count(SV) == 0)
    1504    21831352 :         B[W].insert(SV);
    1505             :     }
    1506             :   }
    1507      296806 :   Stack.pop_back();
    1508      148403 :   return F;
    1509             : }
    1510             : 
    1511             : /// Unblock a node in the circuit finding algorithm.
    1512       72094 : void SwingSchedulerDAG::Circuits::unblock(int U) {
    1513      144188 :   Blocked.reset(U);
    1514      144188 :   SmallPtrSet<SUnit *, 4> &BU = B[U];
    1515      301073 :   while (!BU.empty()) {
    1516       52295 :     SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
    1517             :     assert(SI != BU.end() && "Invalid B set.");
    1518      104590 :     SUnit *W = *SI;
    1519      104590 :     BU.erase(W);
    1520      104590 :     if (Blocked.test(W->NodeNum))
    1521         972 :       unblock(W->NodeNum);
    1522             :   }
    1523       72094 : }
    1524             : 
    1525             : /// Identify all the elementary circuits in the dependence graph using
    1526             : /// Johnson's circuit algorithm.
    1527         113 : void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
    1528             :   // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
    1529             :   // but we do this to find the circuits, and then change them back.
    1530         113 :   swapAntiDependences(SUnits);
    1531             : 
    1532         226 :   Circuits Cir(SUnits);
    1533             :   // Create the adjacency structure.
    1534         113 :   Cir.createAdjacencyStructure(this);
    1535        1587 :   for (int i = 0, e = SUnits.size(); i != e; ++i) {
    1536        1361 :     Cir.reset();
    1537        1361 :     Cir.circuit(i, i, NodeSets);
    1538             :   }
    1539             : 
    1540             :   // Change the dependences back so that we've created a DAG again.
    1541         113 :   swapAntiDependences(SUnits);
    1542         113 : }
    1543             : 
    1544             : /// Return true for DAG nodes that we ignore when computing the cost functions.
    1545             : /// We ignore the back-edge recurrence in order to avoid unbounded recurison
    1546             : /// in the calculation of the ASAP, ALAP, etc functions.
    1547             : static bool ignoreDependence(const SDep &D, bool isPred) {
    1548       11478 :   if (D.isArtificial())
    1549             :     return true;
    1550       11478 :   return D.getKind() == SDep::Anti && isPred;
    1551             : }
    1552             : 
    1553             : /// Compute several functions need to order the nodes for scheduling.
    1554             : ///  ASAP - Earliest time to schedule a node.
    1555             : ///  ALAP - Latest time to schedule a node.
    1556             : ///  MOV - Mobility function, difference between ALAP and ASAP.
    1557             : ///  D - Depth of each node.
    1558             : ///  H - Height of each node.
    1559         112 : void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
    1560         224 :   ScheduleInfo.resize(SUnits.size());
    1561             : 
    1562             :   DEBUG({
    1563             :     for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
    1564             :                                                     E = Topo.end();
    1565             :          I != E; ++I) {
    1566             :       SUnit *SU = &SUnits[*I];
    1567             :       SU->dump(this);
    1568             :     }
    1569             :   });
    1570             : 
    1571         112 :   int maxASAP = 0;
    1572             :   // Compute ASAP.
    1573         336 :   for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
    1574         336 :                                                   E = Topo.end();
    1575         938 :        I != E; ++I) {
    1576         826 :     int asap = 0;
    1577        1652 :     SUnit *SU = &SUnits[*I];
    1578        2878 :     for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
    1579        1652 :                                     EP = SU->Preds.end();
    1580        2052 :          IP != EP; ++IP) {
    1581        1226 :       if (ignoreDependence(*IP, true))
    1582         195 :         continue;
    1583        1031 :       SUnit *pred = IP->getSUnit();
    1584        4124 :       asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
    1585        3093 :                                   getDistance(pred, SU, *IP) * MII));
    1586             :     }
    1587         826 :     maxASAP = std::max(maxASAP, asap);
    1588        1652 :     ScheduleInfo[*I].ASAP = asap;
    1589             :   }
    1590             : 
    1591             :   // Compute ALAP and MOV.
    1592             :   for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
    1593             :                                                           E = Topo.rend();
    1594         938 :        I != E; ++I) {
    1595         826 :     int alap = maxASAP;
    1596        1652 :     SUnit *SU = &SUnits[*I];
    1597        2878 :     for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
    1598        1652 :                                     ES = SU->Succs.end();
    1599        2052 :          IS != ES; ++IS) {
    1600        1226 :       if (ignoreDependence(*IS, true))
    1601         195 :         continue;
    1602        1031 :       SUnit *succ = IS->getSUnit();
    1603        4124 :       alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
    1604        3093 :                                   getDistance(SU, succ, *IS) * MII));
    1605             :     }
    1606             : 
    1607        1652 :     ScheduleInfo[*I].ALAP = alap;
    1608             :   }
    1609             : 
    1610             :   // After computing the node functions, compute the summary for each node set.
    1611         546 :   for (NodeSet &I : NodeSets)
    1612         210 :     I.computeNodeSetInfo(this);
    1613             : 
    1614             :   DEBUG({
    1615             :     for (unsigned i = 0; i < SUnits.size(); i++) {
    1616             :       dbgs() << "\tNode " << i << ":\n";
    1617             :       dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";
    1618             :       dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";
    1619             :       dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";
    1620             :       dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";
    1621             :       dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";
    1622             :     }
    1623             :   });
    1624         112 : }
    1625             : 
    1626             : /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
    1627             : /// as the predecessors of the elements of NodeOrder that are not also in
    1628             : /// NodeOrder.
    1629         460 : static bool pred_L(SetVector<SUnit *> &NodeOrder,
    1630             :                    SmallSetVector<SUnit *, 8> &Preds,
    1631             :                    const NodeSet *S = nullptr) {
    1632         920 :   Preds.clear();
    1633         920 :   for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
    1634        2560 :        I != E; ++I) {
    1635        9749 :     for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
    1636        5549 :          PI != PE; ++PI) {
    1637        5535 :       if (S && S->count(PI->getSUnit()) == 0)
    1638         638 :         continue;
    1639        2811 :       if (ignoreDependence(*PI, true))
    1640         435 :         continue;
    1641        4752 :       if (NodeOrder.count(PI->getSUnit()) == 0)
    1642         430 :         Preds.insert(PI->getSUnit());
    1643             :     }
    1644             :     // Back-edges are predecessors with an anti-dependence.
    1645        7543 :     for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
    1646        4200 :                                     ES = (*I)->Succs.end();
    1647        5443 :          IS != ES; ++IS) {
    1648        3343 :       if (IS->getKind() != SDep::Anti)
    1649        2867 :         continue;
    1650         722 :       if (S && S->count(IS->getSUnit()) == 0)
    1651         101 :         continue;
    1652         750 :       if (NodeOrder.count(IS->getSUnit()) == 0)
    1653           0 :         Preds.insert(IS->getSUnit());
    1654             :     }
    1655             :   }
    1656         920 :   return !Preds.empty();
    1657             : }
    1658             : 
    1659             : /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
    1660             : /// as the successors of the elements of NodeOrder that are not also in
    1661             : /// NodeOrder.
    1662        1239 : static bool succ_L(SetVector<SUnit *> &NodeOrder,
    1663             :                    SmallSetVector<SUnit *, 8> &Succs,
    1664             :                    const NodeSet *S = nullptr) {
    1665        2478 :   Succs.clear();
    1666        2478 :   for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
    1667        5115 :        I != E; ++I) {
    1668       18459 :     for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
    1669       10707 :          SI != SE; ++SI) {
    1670        9953 :       if (S && S->count(SI->getSUnit()) == 0)
    1671         917 :         continue;
    1672        5914 :       if (ignoreDependence(*SI, false))
    1673           0 :         continue;
    1674       11828 :       if (NodeOrder.count(SI->getSUnit()) == 0)
    1675        2476 :         Succs.insert(SI->getSUnit());
    1676             :     }
    1677       14674 :     for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
    1678        7752 :                                     PE = (*I)->Preds.end();
    1679       10798 :          PI != PE; ++PI) {
    1680        6922 :       if (PI->getKind() != SDep::Anti)
    1681        5670 :         continue;
    1682        1790 :       if (S && S->count(PI->getSUnit()) == 0)
    1683         109 :         continue;
    1684        2286 :       if (NodeOrder.count(PI->getSUnit()) == 0)
    1685         366 :         Succs.insert(PI->getSUnit());
    1686             :     }
    1687             :   }
    1688        2478 :   return !Succs.empty();
    1689             : }
    1690             : 
    1691             : /// Return true if there is a path from the specified node to any of the nodes
    1692             : /// in DestNodes. Keep track and return the nodes in any path.
    1693        1741 : static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
    1694             :                         SetVector<SUnit *> &DestNodes,
    1695             :                         SetVector<SUnit *> &Exclude,
    1696             :                         SmallPtrSet<SUnit *, 8> &Visited) {
    1697        1741 :   if (Cur->isBoundaryNode())
    1698             :     return false;
    1699        3482 :   if (Exclude.count(Cur) != 0)
    1700             :     return false;
    1701        3196 :   if (DestNodes.count(Cur) != 0)
    1702             :     return true;
    1703        1379 :   if (!Visited.insert(Cur).second)
    1704         688 :     return Path.count(Cur) != 0;
    1705        1035 :   bool FoundPath = false;
    1706        4386 :   for (auto &SI : Cur->Succs)
    1707        1281 :     FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
    1708        5230 :   for (auto &PI : Cur->Preds)
    1709        2125 :     if (PI.getKind() == SDep::Anti)
    1710          58 :       FoundPath |=
    1711          58 :           computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
    1712        1035 :   if (FoundPath)
    1713         149 :     Path.insert(Cur);
    1714             :   return FoundPath;
    1715             : }
    1716             : 
    1717             : /// Return true if Set1 is a subset of Set2.
    1718         235 : template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
    1719         934 :   for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
    1720         668 :     if (Set2.count(*I) == 0)
    1721             :       return false;
    1722             :   return true;
    1723             : }
    1724             : 
    1725             : /// Compute the live-out registers for the instructions in a node-set.
    1726             : /// The live-out registers are those that are defined in the node-set,
    1727             : /// but not used. Except for use operands of Phis.
    1728          25 : static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
    1729             :                             NodeSet &NS) {
    1730          25 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1731          25 :   MachineRegisterInfo &MRI = MF.getRegInfo();
    1732          50 :   SmallVector<RegisterMaskPair, 8> LiveOutRegs;
    1733          50 :   SmallSet<unsigned, 4> Uses;
    1734         245 :   for (SUnit *SU : NS) {
    1735         145 :     const MachineInstr *MI = SU->getInstr();
    1736         145 :     if (MI->isPHI())
    1737           5 :       continue;
    1738        1284 :     for (const MachineOperand &MO : MI->operands())
    1739         920 :       if (MO.isReg() && MO.isUse()) {
    1740         279 :         unsigned Reg = MO.getReg();
    1741         558 :         if (TargetRegisterInfo::isVirtualRegister(Reg))
    1742         270 :           Uses.insert(Reg);
    1743           9 :         else if (MRI.isAllocatable(Reg))
    1744           0 :           for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
    1745           0 :             Uses.insert(*Units);
    1746             :       }
    1747             :   }
    1748         245 :   for (SUnit *SU : NS)
    1749         817 :     for (const MachineOperand &MO : SU->getInstr()->operands())
    1750        1104 :       if (MO.isReg() && MO.isDef() && !MO.isDead()) {
    1751         136 :         unsigned Reg = MO.getReg();
    1752         272 :         if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    1753         136 :           if (!Uses.count(Reg))
    1754          46 :             LiveOutRegs.push_back(RegisterMaskPair(Reg,
    1755             :                                                    LaneBitmask::getNone()));
    1756           0 :         } else if (MRI.isAllocatable(Reg)) {
    1757           0 :           for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
    1758           0 :             if (!Uses.count(*Units))
    1759           0 :               LiveOutRegs.push_back(RegisterMaskPair(*Units,
    1760             :                                                      LaneBitmask::getNone()));
    1761             :         }
    1762             :       }
    1763          25 :   RPTracker.addLiveRegs(LiveOutRegs);
    1764          25 : }
    1765             : 
    1766             : /// A heuristic to filter nodes in recurrent node-sets if the register
    1767             : /// pressure of a set is too high.
    1768         112 : void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
    1769         546 :   for (auto &NS : NodeSets) {
    1770             :     // Skip small node-sets since they won't cause register pressure problems.
    1771         210 :     if (NS.size() <= 2)
    1772         185 :       continue;
    1773          50 :     IntervalPressure RecRegPressure;
    1774          50 :     RegPressureTracker RecRPTracker(RecRegPressure);
    1775          75 :     RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
    1776          25 :     computeLiveOuts(MF, RecRPTracker, NS);
    1777          25 :     RecRPTracker.closeBottom();
    1778             : 
    1779         150 :     std::vector<SUnit *> SUnits(NS.begin(), NS.end());
    1780          50 :     std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
    1781             :       return A->NodeNum > B->NodeNum;
    1782             :     });
    1783             : 
    1784         242 :     for (auto &SU : SUnits) {
    1785             :       // Since we're computing the register pressure for a subset of the
    1786             :       // instructions in a block, we need to set the tracker for each
    1787             :       // instruction in the node-set. The tracker is set to the instruction
    1788             :       // just after the one we're interested in.
    1789         286 :       MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
    1790         143 :       RecRPTracker.setPos(std::next(CurInstI));
    1791             : 
    1792         143 :       RegPressureDelta RPDelta;
    1793         143 :       ArrayRef<PressureChange> CriticalPSets;
    1794         143 :       RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
    1795             :                                              CriticalPSets,
    1796             :                                              RecRegPressure.MaxSetPressure);
    1797         143 :       if (RPDelta.Excess.isValid()) {
    1798             :         DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
    1799             :                      << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
    1800             :                      << ":" << RPDelta.Excess.getUnitInc());
    1801           2 :         NS.setExceedPressure(SU);
    1802           1 :         break;
    1803             :       }
    1804         142 :       RecRPTracker.recede();
    1805             :     }
    1806             :   }
    1807         112 : }
    1808             : 
    1809             : /// A heuristic to colocate node sets that have the same set of
    1810             : /// successors.
    1811         112 : void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
    1812         112 :   unsigned Colocate = 0;
    1813         434 :   for (int i = 0, e = NodeSets.size(); i < e; ++i) {
    1814         420 :     NodeSet &N1 = NodeSets[i];
    1815         367 :     SmallSetVector<SUnit *, 8> S1;
    1816         420 :     if (N1.empty() || !succ_L(N1, S1))
    1817          53 :       continue;
    1818         291 :     for (int j = i + 1; j < e; ++j) {
    1819         282 :       NodeSet &N2 = NodeSets[j];
    1820         141 :       if (N1.compareRecMII(N2) != 0)
    1821          74 :         continue;
    1822         156 :       SmallSetVector<SUnit *, 8> S2;
    1823         221 :       if (N2.empty() || !succ_L(N2, S2))
    1824             :         continue;
    1825          87 :       if (isSubset(S1, S2) && S1.size() == S2.size()) {
    1826          14 :         N1.setColocate(++Colocate);
    1827          14 :         N2.setColocate(Colocate);
    1828           7 :         break;
    1829             :       }
    1830             :     }
    1831             :   }
    1832         112 : }
    1833             : 
    1834             : /// Check if the existing node-sets are profitable. If not, then ignore the
    1835             : /// recurrent node-sets, and attempt to schedule all nodes together. This is
    1836             : /// a heuristic. If the MII is large and there is a non-recurrent node with
    1837             : /// a large depth compared to the MII, then it's best to try and schedule
    1838             : /// all instruction together instead of starting with the recurrent node-sets.
    1839         112 : void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
    1840             :   // Look for loops with a large MII.
    1841         112 :   if (MII <= 20)
    1842             :     return;
    1843             :   // Check if the node-set contains only a simple add recurrence.
    1844           0 :   for (auto &NS : NodeSets)
    1845           0 :     if (NS.size() > 2)
    1846             :       return;
    1847             :   // If the depth of any instruction is significantly larger than the MII, then
    1848             :   // ignore the recurrent node-sets and treat all instructions equally.
    1849           0 :   for (auto &SU : SUnits)
    1850           0 :     if (SU.getDepth() > MII * 1.5) {
    1851           0 :       NodeSets.clear();
    1852             :       DEBUG(dbgs() << "Clear recurrence node-sets\n");
    1853           0 :       return;
    1854             :     }
    1855             : }
    1856             : 
    1857             : /// Add the nodes that do not belong to a recurrence set into groups
    1858             : /// based upon connected componenets.
    1859         112 : void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
    1860         224 :   SetVector<SUnit *> NodesAdded;
    1861         224 :   SmallPtrSet<SUnit *, 8> Visited;
    1862             :   // Add the nodes that are on a path between the previous node sets and
    1863             :   // the current node set.
    1864         546 :   for (NodeSet &I : NodeSets) {
    1865         420 :     SmallSetVector<SUnit *, 8> N;
    1866             :     // Add the nodes from the current node set to the previous node set.
    1867         210 :     if (succ_L(I, N)) {
    1868         314 :       SetVector<SUnit *> Path;
    1869         761 :       for (SUnit *NI : N) {
    1870         290 :         Visited.clear();
    1871         290 :         computePath(NI, Path, NodesAdded, I, Visited);
    1872             :       }
    1873         157 :       if (!Path.empty())
    1874          46 :         I.insert(Path.begin(), Path.end());
    1875             :     }
    1876             :     // Add the nodes from the previous node set to the current node set.
    1877         210 :     N.clear();
    1878         210 :     if (succ_L(NodesAdded, N)) {
    1879         112 :       SetVector<SUnit *> Path;
    1880         280 :       for (SUnit *NI : N) {
    1881         112 :         Visited.clear();
    1882         112 :         computePath(NI, Path, I, NodesAdded, Visited);
    1883             :       }
    1884          56 :       if (!Path.empty())
    1885          16 :         I.insert(Path.begin(), Path.end());
    1886             :     }
    1887         420 :     NodesAdded.insert(I.begin(), I.end());
    1888             :   }
    1889             : 
    1890             :   // Create a new node set with the connected nodes of any successor of a node
    1891             :   // in a recurrent set.
    1892         224 :   NodeSet NewSet;
    1893         224 :   SmallSetVector<SUnit *, 8> N;
    1894         112 :   if (succ_L(NodesAdded, N))
    1895         286 :     for (SUnit *I : N)
    1896         109 :       addConnectedNodes(I, NewSet, NodesAdded);
    1897         112 :   if (!NewSet.empty())
    1898          59 :     NodeSets.push_back(NewSet);
    1899             : 
    1900             :   // Create a new node set with the connected nodes of any predecessor of a node
    1901             :   // in a recurrent set.
    1902         112 :   NewSet.clear();
    1903         112 :   if (pred_L(NodesAdded, N))
    1904          16 :     for (SUnit *I : N)
    1905           4 :       addConnectedNodes(I, NewSet, NodesAdded);
    1906         112 :   if (!NewSet.empty())
    1907           4 :     NodeSets.push_back(NewSet);
    1908             : 
    1909             :   // Create new nodes sets with the connected nodes any any remaining node that
    1910             :   // has no predecessor.
    1911        2702 :   for (unsigned i = 0; i < SUnits.size(); ++i) {
    1912        1652 :     SUnit *SU = &SUnits[i];
    1913        1652 :     if (NodesAdded.count(SU) == 0) {
    1914           1 :       NewSet.clear();
    1915           1 :       addConnectedNodes(SU, NewSet, NodesAdded);
    1916           1 :       if (!NewSet.empty())
    1917           1 :         NodeSets.push_back(NewSet);
    1918             :     }
    1919             :   }
    1920         112 : }
    1921             : 
    1922             : /// Add the node to the set, and add all is its connected nodes to the set.
    1923         300 : void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
    1924             :                                           SetVector<SUnit *> &NodesAdded) {
    1925         600 :   NewSet.insert(SU);
    1926         300 :   NodesAdded.insert(SU);
    1927        1262 :   for (auto &SI : SU->Succs) {
    1928         362 :     SUnit *Successor = SI.getSUnit();
    1929         724 :     if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
    1930         102 :       addConnectedNodes(Successor, NewSet, NodesAdded);
    1931             :   }
    1932        1360 :   for (auto &PI : SU->Preds) {
    1933         460 :     SUnit *Predecessor = PI.getSUnit();
    1934         920 :     if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
    1935          84 :       addConnectedNodes(Predecessor, NewSet, NodesAdded);
    1936             :   }
    1937         300 : }
    1938             : 
    1939             : /// Return true if Set1 contains elements in Set2. The elements in common
    1940             : /// are returned in a different container.
    1941         143 : static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
    1942             :                         SmallSetVector<SUnit *, 8> &Result) {
    1943         286 :   Result.clear();
    1944         343 :   for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
    1945         114 :     SUnit *SU = Set1[i];
    1946         114 :     if (Set2.count(SU) != 0)
    1947          13 :       Result.insert(SU);
    1948             :   }
    1949         286 :   return !Result.empty();
    1950             : }
    1951             : 
    1952             : /// Merge the recurrence node sets that have the same initial node.
    1953         113 : void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
    1954         339 :   for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
    1955             :        ++I) {
    1956         216 :     NodeSet &NI = *I;
    1957         216 :     for (NodeSetType::iterator J = I + 1; J != E;) {
    1958         343 :       NodeSet &NJ = *J;
    1959         686 :       if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
    1960          76 :         if (NJ.compareRecMII(NI) > 0)
    1961           6 :           NI.setRecMII(NJ.getRecMII());
    1962         304 :         for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
    1963             :              ++NII)
    1964         380 :           I->insert(*NII);
    1965          38 :         NodeSets.erase(J);
    1966          76 :         E = NodeSets.end();
    1967             :       } else {
    1968         305 :         ++J;
    1969             :       }
    1970             :     }
    1971             :   }
    1972         113 : }
    1973             : 
    1974             : /// Remove nodes that have been scheduled in previous NodeSets.
    1975         112 : void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
    1976         599 :   for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
    1977             :        ++I)
    1978         263 :     for (NodeSetType::iterator J = I + 1; J != E;) {
    1979        2038 :       J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
    1980             : 
    1981         222 :       if (J->empty()) {
    1982          11 :         NodeSets.erase(J);
    1983          22 :         E = NodeSets.end();
    1984             :       } else {
    1985         211 :         ++J;
    1986             :       }
    1987             :     }
    1988         112 : }
    1989             : 
    1990             : /// Return true if Inst1 defines a value that is used in Inst2.
    1991             : static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
    1992        8457 :   for (auto &SI : Inst1->Succs)
    1993        2040 :     if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
    1994             :       return true;
    1995             :   return false;
    1996             : }
    1997             : 
    1998             : /// Compute an ordered list of the dependence graph nodes, which
    1999             : /// indicates the order that the nodes will be scheduled.  This is a
    2000             : /// two-level algorithm. First, a partial order is created, which
    2001             : /// consists of a list of sets ordered from highest to lowest priority.
    2002         112 : void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
    2003         224 :   SmallSetVector<SUnit *, 8> R;
    2004         224 :   NodeOrder.clear();
    2005             : 
    2006         599 :   for (auto &Nodes : NodeSets) {
    2007             :     DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
    2008             :     OrderKind Order;
    2009         526 :     SmallSetVector<SUnit *, 8> N;
    2010         263 :     if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
    2011          90 :       R.insert(N.begin(), N.end());
    2012          45 :       Order = BottomUp;
    2013             :       DEBUG(dbgs() << "  Bottom up (preds) ");
    2014         218 :     } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
    2015         150 :       R.insert(N.begin(), N.end());
    2016          75 :       Order = TopDown;
    2017             :       DEBUG(dbgs() << "  Top down (succs) ");
    2018         143 :     } else if (isIntersect(N, Nodes, R)) {
    2019             :       // If some of the successors are in the existing node-set, then use the
    2020             :       // top-down ordering.
    2021             :       Order = TopDown;
    2022             :       DEBUG(dbgs() << "  Top down (intersect) ");
    2023         268 :     } else if (NodeSets.size() == 1) {
    2024           5 :       for (auto &N : Nodes)
    2025           2 :         if (N->Succs.size() == 0)
    2026           1 :           R.insert(N);
    2027             :       Order = BottomUp;
    2028             :       DEBUG(dbgs() << "  Bottom up (all) ");
    2029             :     } else {
    2030             :       // Find the node with the highest ASAP.
    2031         133 :       SUnit *maxASAP = nullptr;
    2032         888 :       for (SUnit *SU : Nodes) {
    2033         802 :         if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
    2034         319 :           maxASAP = SU;
    2035             :       }
    2036         133 :       R.insert(maxASAP);
    2037         133 :       Order = BottomUp;
    2038             :       DEBUG(dbgs() << "  Bottom up (default) ");
    2039             :     }
    2040             : 
    2041         531 :     while (!R.empty()) {
    2042         268 :       if (Order == TopDown) {
    2043             :         // Choose the node with the maximum height.  If more than one, choose
    2044             :         // the node with the lowest MOV. If still more than one, check if there
    2045             :         // is a dependence between the instructions.
    2046         773 :         while (!R.empty()) {
    2047         344 :           SUnit *maxHeight = nullptr;
    2048        2549 :           for (SUnit *I : R) {
    2049        3863 :             if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
    2050         533 :               maxHeight = I;
    2051        3532 :             else if (getHeight(I) == getHeight(maxHeight) &&
    2052        2153 :                      getMOV(I) < getMOV(maxHeight) &&
    2053          18 :                      !hasDataDependence(maxHeight, I))
    2054           9 :               maxHeight = I;
    2055        1950 :             else if (hasDataDependence(I, maxHeight))
    2056          15 :               maxHeight = I;
    2057             :           }
    2058         344 :           NodeOrder.insert(maxHeight);
    2059             :           DEBUG(dbgs() << maxHeight->NodeNum << " ");
    2060         344 :           R.remove(maxHeight);
    2061        1421 :           for (const auto &I : maxHeight->Succs) {
    2062         778 :             if (Nodes.count(I.getSUnit()) == 0)
    2063          38 :               continue;
    2064        1053 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2065          50 :               continue;
    2066         301 :             if (ignoreDependence(I, false))
    2067           0 :               continue;
    2068         301 :             R.insert(I.getSUnit());
    2069             :           }
    2070             :           // Back-edges are predecessors with an anti-dependence.
    2071        1591 :           for (const auto &I : maxHeight->Preds) {
    2072         559 :             if (I.getKind() != SDep::Anti)
    2073         536 :               continue;
    2074          46 :             if (Nodes.count(I.getSUnit()) == 0)
    2075           1 :               continue;
    2076          66 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2077           0 :               continue;
    2078          22 :             R.insert(I.getSUnit());
    2079             :           }
    2080             :         }
    2081          85 :         Order = BottomUp;
    2082             :         DEBUG(dbgs() << "\n   Switching order to bottom up ");
    2083         170 :         SmallSetVector<SUnit *, 8> N;
    2084          85 :         if (pred_L(NodeOrder, N, &Nodes))
    2085           8 :           R.insert(N.begin(), N.end());
    2086             :       } else {
    2087             :         // Choose the node with the maximum depth.  If more than one, choose
    2088             :         // the node with the lowest MOV. If there is still more than one, check
    2089             :         // for a dependence between the instructions.
    2090        1147 :         while (!R.empty()) {
    2091         482 :           SUnit *maxDepth = nullptr;
    2092        2186 :           for (SUnit *I : R) {
    2093        1256 :             if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
    2094         549 :               maxDepth = I;
    2095         644 :             else if (getDepth(I) == getDepth(maxDepth) &&
    2096         357 :                      getMOV(I) < getMOV(maxDepth) &&
    2097          48 :                      !hasDataDependence(I, maxDepth))
    2098          22 :               maxDepth = I;
    2099         338 :             else if (hasDataDependence(maxDepth, I))
    2100          10 :               maxDepth = I;
    2101             :           }
    2102         482 :           NodeOrder.insert(maxDepth);
    2103             :           DEBUG(dbgs() << maxDepth->NodeNum << " ");
    2104         482 :           R.remove(maxDepth);
    2105         482 :           if (Nodes.isExceedSU(maxDepth)) {
    2106           0 :             Order = TopDown;
    2107           0 :             R.clear();
    2108           0 :             R.insert(Nodes.getNode(0));
    2109           0 :             break;
    2110             :           }
    2111        2113 :           for (const auto &I : maxDepth->Preds) {
    2112        1334 :             if (Nodes.count(I.getSUnit()) == 0)
    2113         148 :               continue;
    2114        1557 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2115          60 :               continue;
    2116         459 :             if (I.getKind() == SDep::Anti)
    2117         115 :               continue;
    2118         344 :             R.insert(I.getSUnit());
    2119             :           }
    2120             :           // Back-edges are predecessors with an anti-dependence.
    2121        2283 :           for (const auto &I : maxDepth->Succs) {
    2122         837 :             if (I.getKind() != SDep::Anti)
    2123         678 :               continue;
    2124         318 :             if (Nodes.count(I.getSUnit()) == 0)
    2125          15 :               continue;
    2126         432 :             if (NodeOrder.count(I.getSUnit()) != 0)
    2127         115 :               continue;
    2128          29 :             R.insert(I.getSUnit());
    2129             :           }
    2130             :         }
    2131         183 :         Order = TopDown;
    2132             :         DEBUG(dbgs() << "\n   Switching order to top down ");
    2133         366 :         SmallSetVector<SUnit *, 8> N;
    2134         183 :         if (succ_L(NodeOrder, N, &Nodes))
    2135           2 :           R.insert(N.begin(), N.end());
    2136             :       }
    2137             :     }
    2138             :     DEBUG(dbgs() << "\nDone with Nodeset\n");
    2139             :   }
    2140             : 
    2141             :   DEBUG({
    2142             :     dbgs() << "Node order: ";
    2143             :     for (SUnit *I : NodeOrder)
    2144             :       dbgs() << " " << I->NodeNum << " ";
    2145             :     dbgs() << "\n";
    2146             :   });
    2147         112 : }
    2148             : 
    2149             : /// Process the nodes in the computed order and create the pipelined schedule
    2150             : /// of the instructions, if possible. Return true if a schedule is found.
    2151         112 : bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
    2152         224 :   if (NodeOrder.empty())
    2153             :     return false;
    2154             : 
    2155         112 :   bool scheduleFound = false;
    2156             :   // Keep increasing II until a valid schedule is found.
    2157         270 :   for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
    2158         158 :     Schedule.reset();
    2159         316 :     Schedule.setInitiationInterval(II);
    2160             :     DEBUG(dbgs() << "Try to schedule with " << II << "\n");
    2161             : 
    2162         316 :     SetVector<SUnit *>::iterator NI = NodeOrder.begin();
    2163         316 :     SetVector<SUnit *>::iterator NE = NodeOrder.end();
    2164             :     do {
    2165        1350 :       SUnit *SU = *NI;
    2166             : 
    2167             :       // Compute the schedule time for the instruction, which is based
    2168             :       // upon the scheduled time for any predecessors/successors.
    2169        1350 :       int EarlyStart = INT_MIN;
    2170        1350 :       int LateStart = INT_MAX;
    2171             :       // These values are set when the size of the schedule window is limited
    2172             :       // due to chain dependences.
    2173        1350 :       int SchedEnd = INT_MAX;
    2174        1350 :       int SchedStart = INT_MIN;
    2175        1350 :       Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
    2176             :                             II, this);
    2177             :       DEBUG({
    2178             :         dbgs() << "Inst (" << SU->NodeNum << ") ";
    2179             :         SU->getInstr()->dump();
    2180             :         dbgs() << "\n";
    2181             :       });
    2182             :       DEBUG({
    2183             :         dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
    2184             :                << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
    2185             :       });
    2186             : 
    2187        2680 :       if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
    2188        1330 :           SchedStart > LateStart)
    2189             :         scheduleFound = false;
    2190        1325 :       else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
    2191        1196 :         SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
    2192         598 :         scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
    2193         727 :       } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
    2194         564 :         SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
    2195         282 :         scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
    2196         445 :       } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
    2197         262 :         SchedEnd =
    2198        1048 :             std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
    2199             :         // When scheduling a Phi it is better to start at the late cycle and go
    2200             :         // backwards. The default order may insert the Phi too far away from
    2201             :         // its first dependence.
    2202         335 :         if (SU->getInstr()->isPHI())
    2203         189 :           scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
    2204             :         else
    2205          73 :           scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
    2206             :       } else {
    2207         183 :         int FirstCycle = Schedule.getFirstCycle();
    2208         366 :         scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
    2209         366 :                                         FirstCycle + getASAP(SU) + II - 1, II);
    2210             :       }
    2211             :       // Even if we find a schedule, make sure the schedule doesn't exceed the
    2212             :       // allowable number of stages. We keep trying if this happens.
    2213        1325 :       if (scheduleFound)
    2214        2600 :         if (SwpMaxStages > -1 &&
    2215        3900 :             Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
    2216             :           scheduleFound = false;
    2217             : 
    2218             :       DEBUG({
    2219             :         if (!scheduleFound)
    2220             :           dbgs() << "\tCan't schedule\n";
    2221             :       });
    2222        2700 :     } while (++NI != NE && scheduleFound);
    2223             : 
    2224             :     // If a schedule is found, check if it is a valid schedule too.
    2225         158 :     if (scheduleFound)
    2226         108 :       scheduleFound = Schedule.isValidSchedule(this);
    2227             :   }
    2228             : 
    2229             :   DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
    2230             : 
    2231         112 :   if (scheduleFound)
    2232         108 :     Schedule.finalizeSchedule(this);
    2233             :   else
    2234           4 :     Schedule.reset();
    2235             : 
    2236         220 :   return scheduleFound && Schedule.getMaxStageCount() > 0;
    2237             : }
    2238             : 
    2239             : /// Given a schedule for the loop, generate a new version of the loop,
    2240             : /// and replace the old version.  This function generates a prolog
    2241             : /// that contains the initial iterations in the pipeline, and kernel
    2242             : /// loop, and the epilogue that contains the code for the final
    2243             : /// iterations.
    2244          82 : void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
    2245             :   // Create a new basic block for the kernel and add it to the CFG.
    2246          82 :   MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
    2247             : 
    2248         164 :   unsigned MaxStageCount = Schedule.getMaxStageCount();
    2249             : 
    2250             :   // Remember the registers that are used in different stages. The index is
    2251             :   // the iteration, or stage, that the instruction is scheduled in.  This is
    2252             :   // a map between register names in the orignal block and the names created
    2253             :   // in each stage of the pipelined loop.
    2254         428 :   ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
    2255         164 :   InstrMapTy InstrMap;
    2256             : 
    2257         164 :   SmallVector<MachineBasicBlock *, 4> PrologBBs;
    2258             :   // Generate the prolog instructions that set up the pipeline.
    2259          82 :   generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
    2260         246 :   MF.insert(BB->getIterator(), KernelBB);
    2261             : 
    2262             :   // Rearrange the instructions to generate the new, pipelined loop,
    2263             :   // and update register names as needed.
    2264         228 :   for (int Cycle = Schedule.getFirstCycle(),
    2265         164 :            LastCycle = Schedule.getFinalCycle();
    2266         228 :        Cycle <= LastCycle; ++Cycle) {
    2267         146 :     std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
    2268             :     // This inner loop schedules each instruction in the cycle.
    2269        1001 :     for (SUnit *CI : CycleInstrs) {
    2270        1126 :       if (CI->getInstr()->isPHI())
    2271         135 :         continue;
    2272         856 :       unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
    2273         428 :       MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
    2274         428 :       updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
    2275         856 :       KernelBB->push_back(NewMI);
    2276         428 :       InstrMap[NewMI] = CI->getInstr();
    2277             :     }
    2278             :   }
    2279             : 
    2280             :   // Copy any terminator instructions to the new kernel, and update
    2281             :   // names as needed.
    2282          82 :   for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
    2283         164 :                                    E = BB->instr_end();
    2284         246 :        I != E; ++I) {
    2285         328 :     MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
    2286         164 :     updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
    2287         328 :     KernelBB->push_back(NewMI);
    2288         328 :     InstrMap[NewMI] = &*I;
    2289             :   }
    2290             : 
    2291          82 :   KernelBB->transferSuccessors(BB);
    2292          82 :   KernelBB->replaceSuccessor(BB, KernelBB);
    2293             : 
    2294         164 :   generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
    2295             :                        VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
    2296         164 :   generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
    2297             :                InstrMap, MaxStageCount, MaxStageCount, false);
    2298             : 
    2299             :   DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
    2300             : 
    2301         164 :   SmallVector<MachineBasicBlock *, 4> EpilogBBs;
    2302             :   // Generate the epilog instructions to complete the pipeline.
    2303          82 :   generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
    2304             :                  PrologBBs);
    2305             : 
    2306             :   // We need this step because the register allocation doesn't handle some
    2307             :   // situations well, so we insert copies to help out.
    2308          82 :   splitLifetimes(KernelBB, EpilogBBs, Schedule);
    2309             : 
    2310             :   // Remove dead instructions due to loop induction variables.
    2311          82 :   removeDeadInstructions(KernelBB, EpilogBBs);
    2312             : 
    2313             :   // Add branches between prolog and epilog blocks.
    2314          82 :   addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
    2315             : 
    2316             :   // Remove the original loop since it's no longer referenced.
    2317         164 :   BB->clear();
    2318          82 :   BB->eraseFromParent();
    2319             : 
    2320         164 :   delete[] VRMap;
    2321          82 : }
    2322             : 
    2323             : /// Generate the pipeline prolog code.
    2324          82 : void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
    2325             :                                        MachineBasicBlock *KernelBB,
    2326             :                                        ValueMapTy *VRMap,
    2327             :                                        MBBVectorTy &PrologBBs) {
    2328         164 :   MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
    2329             :   assert(PreheaderBB != nullptr &&
    2330             :          "Need to add code to handle loops w/o preheader");
    2331          82 :   MachineBasicBlock *PredBB = PreheaderBB;
    2332         164 :   InstrMapTy InstrMap;
    2333             : 
    2334             :   // Generate a basic block for each stage, not including the last stage,
    2335             :   // which will be generated in the kernel. Each basic block may contain
    2336             :   // instructions from multiple stages/iterations.
    2337         173 :   for (unsigned i = 0; i < LastStage; ++i) {
    2338             :     // Create and insert the prolog basic block prior to the original loop
    2339             :     // basic block.  The original loop is removed later.
    2340          91 :     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
    2341          91 :     PrologBBs.push_back(NewBB);
    2342         273 :     MF.insert(BB->getIterator(), NewBB);
    2343          91 :     NewBB->transferSuccessors(PredBB);
    2344          91 :     PredBB->addSuccessor(NewBB);
    2345          91 :     PredBB = NewBB;
    2346             : 
    2347             :     // Generate instructions for each appropriate stage. Process instructions
    2348             :     // in original program order.
    2349         193 :     for (int StageNum = i; StageNum >= 0; --StageNum) {
    2350         306 :       for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
    2351         102 :                                        BBE = BB->getFirstTerminator();
    2352         991 :            BBI != BBE; ++BBI) {
    2353        3556 :         if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
    2354         948 :           if (BBI->isPHI())
    2355         119 :             continue;
    2356             :           MachineInstr *NewMI =
    2357         710 :               cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
    2358         355 :           updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
    2359             :                             VRMap);
    2360         710 :           NewBB->push_back(NewMI);
    2361         710 :           InstrMap[NewMI] = &*BBI;
    2362             :         }
    2363             :       }
    2364             :     }
    2365          91 :     rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
    2366             :     DEBUG({
    2367             :       dbgs() << "prolog:\n";
    2368             :       NewBB->dump();
    2369             :     });
    2370             :   }
    2371             : 
    2372          82 :   PredBB->replaceSuccessor(BB, KernelBB);
    2373             : 
    2374             :   // Check if we need to remove the branch from the preheader to the original
    2375             :   // loop, and replace it with a branch to the new loop.
    2376          82 :   unsigned numBranches = TII->removeBranch(*PreheaderBB);
    2377          82 :   if (numBranches) {
    2378          30 :     SmallVector<MachineOperand, 0> Cond;
    2379          75 :     TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
    2380             :   }
    2381          82 : }
    2382             : 
    2383             : /// Generate the pipeline epilog code. The epilog code finishes the iterations
    2384             : /// that were started in either the prolog or the kernel.  We create a basic
    2385             : /// block for each stage that needs to complete.
    2386          82 : void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
    2387             :                                        MachineBasicBlock *KernelBB,
    2388             :                                        ValueMapTy *VRMap,
    2389             :                                        MBBVectorTy &EpilogBBs,
    2390             :                                        MBBVectorTy &PrologBBs) {
    2391             :   // We need to change the branch from the kernel to the first epilog block, so
    2392             :   // this call to analyze branch uses the kernel rather than the original BB.
    2393          82 :   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    2394         164 :   SmallVector<MachineOperand, 4> Cond;
    2395          82 :   bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
    2396             :   assert(!checkBranch && "generateEpilog must be able to analyze the branch");
    2397          82 :   if (checkBranch)
    2398           0 :     return;
    2399             : 
    2400          82 :   MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
    2401          82 :   if (*LoopExitI == KernelBB)
    2402             :     ++LoopExitI;
    2403             :   assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
    2404          82 :   MachineBasicBlock *LoopExitBB = *LoopExitI;
    2405             : 
    2406          82 :   MachineBasicBlock *PredBB = KernelBB;
    2407          82 :   MachineBasicBlock *EpilogStart = LoopExitBB;
    2408         164 :   InstrMapTy InstrMap;
    2409             : 
    2410             :   // Generate a basic block for each stage, not including the last stage,
    2411             :   // which was generated for the kernel.  Each basic block may contain
    2412             :   // instructions from multiple stages/iterations.
    2413          82 :   int EpilogStage = LastStage + 1;
    2414         173 :   for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
    2415          91 :     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
    2416          91 :     EpilogBBs.push_back(NewBB);
    2417         273 :     MF.insert(BB->getIterator(), NewBB);
    2418             : 
    2419          91 :     PredBB->replaceSuccessor(LoopExitBB, NewBB);
    2420         182 :     NewBB->addSuccessor(LoopExitBB);
    2421             : 
    2422          91 :     if (EpilogStart == LoopExitBB)
    2423          82 :       EpilogStart = NewBB;
    2424             : 
    2425             :     // Add instructions to the epilog depending on the current block.
    2426             :     // Process instructions in original program order.
    2427         193 :     for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
    2428        2594 :       for (auto &BBI : *BB) {
    2429        1093 :         if (BBI.isPHI())
    2430         198 :           continue;
    2431         895 :         MachineInstr *In = &BBI;
    2432        2685 :         if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
    2433         199 :           MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
    2434         199 :           updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
    2435         398 :           NewBB->push_back(NewMI);
    2436         199 :           InstrMap[NewMI] = In;
    2437             :         }
    2438             :       }
    2439             :     }
    2440         182 :     generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
    2441             :                          VRMap, InstrMap, LastStage, EpilogStage, i == 1);
    2442         182 :     generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
    2443             :                  InstrMap, LastStage, EpilogStage, i == 1);
    2444          91 :     PredBB = NewBB;
    2445             : 
    2446             :     DEBUG({
    2447             :       dbgs() << "epilog:\n";
    2448             :       NewBB->dump();
    2449             :     });
    2450             :   }
    2451             : 
    2452             :   // Fix any Phi nodes in the loop exit block.
    2453         428 :   for (MachineInstr &MI : *LoopExitBB) {
    2454             :     if (!MI.isPHI())
    2455             :       break;
    2456          30 :     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
    2457          42 :       MachineOperand &MO = MI.getOperand(i);
    2458          21 :       if (MO.getMBB() == BB)
    2459           9 :         MO.setMBB(PredBB);
    2460             :     }
    2461             :   }
    2462             : 
    2463             :   // Create a branch to the new epilog from the kernel.
    2464             :   // Remove the original branch and add a new branch to the epilog.
    2465          82 :   TII->removeBranch(*KernelBB);
    2466         328 :   TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
    2467             :   // Add a branch to the loop exit.
    2468         164 :   if (EpilogBBs.size() > 0) {
    2469         164 :     MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
    2470         164 :     SmallVector<MachineOperand, 4> Cond1;
    2471         328 :     TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
    2472             :   }
    2473             : }
    2474             : 
    2475             : /// Replace all uses of FromReg that appear outside the specified
    2476             : /// basic block with ToReg.
    2477         368 : static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
    2478             :                                     MachineBasicBlock *MBB,
    2479             :                                     MachineRegisterInfo &MRI,
    2480             :                                     LiveIntervals &LIS) {
    2481         368 :   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
    2482         368 :                                          E = MRI.use_end();
    2483         922 :        I != E;) {
    2484         554 :     MachineOperand &O = *I;
    2485         554 :     ++I;
    2486         554 :     if (O.getParent()->getParent() != MBB)
    2487          29 :       O.setReg(ToReg);
    2488             :   }
    2489             :   if (!LIS.hasInterval(ToReg))
    2490         368 :     LIS.createEmptyInterval(ToReg);
    2491         368 : }
    2492             : 
    2493             : /// Return true if the register has a use that occurs outside the
    2494             : /// specified loop.
    2495         199 : static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
    2496             :                             MachineRegisterInfo &MRI) {
    2497         199 :   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
    2498         199 :                                          E = MRI.use_end();
    2499         421 :        I != E; ++I)
    2500         222 :     if (I->getParent()->getParent() != BB)
    2501             :       return true;
    2502             :   return false;
    2503             : }
    2504             : 
    2505             : /// Generate Phis for the specific block in the generated pipelined code.
    2506             : /// This function looks at the Phis from the original code to guide the
    2507             : /// creation of new Phis.
    2508         173 : void SwingSchedulerDAG::generateExistingPhis(
    2509             :     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
    2510             :     MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
    2511             :     InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
    2512             :     bool IsLast) {
    2513             :   // Compute the stage number for the initial value of the Phi, which
    2514             :   // comes from the prolog. The prolog to use depends on to which kernel/
    2515             :   // epilog that we're adding the Phi.
    2516         173 :   unsigned PrologStage = 0;
    2517         173 :   unsigned PrevStage = 0;
    2518         173 :   bool InKernel = (LastStageNum == CurStageNum);
    2519         173 :   if (InKernel) {
    2520          82 :     PrologStage = LastStageNum - 1;
    2521          82 :     PrevStage = CurStageNum;
    2522             :   } else {
    2523          91 :     PrologStage = LastStageNum - (CurStageNum - LastStageNum);
    2524          91 :     PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
    2525             :   }
    2526             : 
    2527         519 :   for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
    2528         173 :                                    BBE = BB->getFirstNonPHI();
    2529         472 :        BBI != BBE; ++BBI) {
    2530         299 :     unsigned Def = BBI->getOperand(0).getReg();
    2531             : 
    2532         299 :     unsigned InitVal = 0;
    2533         299 :     unsigned LoopVal = 0;
    2534         897 :     getPhiRegs(*BBI, BB, InitVal, LoopVal);
    2535             : 
    2536         299 :     unsigned PhiOp1 = 0;
    2537             :     // The Phi value from the loop body typically is defined in the loop, but
    2538             :     // not always. So, we need to check if the value is defined in the loop.
    2539         299 :     unsigned PhiOp2 = LoopVal;
    2540         598 :     if (VRMap[LastStageNum].count(LoopVal))
    2541         598 :       PhiOp2 = VRMap[LastStageNum][LoopVal];
    2542             : 
    2543         897 :     int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
    2544             :     int LoopValStage =
    2545         598 :         Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
    2546         299 :     unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
    2547         299 :     if (NumStages == 0) {
    2548             :       // We don't need to generate a Phi anymore, but we need to rename any uses
    2549             :       // of the Phi value.
    2550           4 :       unsigned NewReg = VRMap[PrevStage][LoopVal];
    2551           4 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
    2552             :                             Def, NewReg);
    2553           2 :       if (VRMap[CurStageNum].count(LoopVal))
    2554           6 :         VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
    2555             :     }
    2556             :     // Adjust the number of Phis needed depending on the number of prologs left,
    2557             :     // and the distance from where the Phi is first scheduled.
    2558         299 :     unsigned NumPhis = NumStages;
    2559         299 :     if (!InKernel && (int)PrologStage < LoopValStage)
    2560             :       // The NumPhis is the maximum number of new Phis needed during the steady
    2561             :       // state. If the Phi has not been scheduled in current prolog, then we
    2562             :       // need to generate less Phis.
    2563          94 :       NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
    2564             :     // The number of Phis cannot exceed the number of prolog stages. Each
    2565             :     // stage can potentially define two values.
    2566         598 :     NumPhis = std::min(NumPhis, PrologStage + 2);
    2567             : 
    2568         299 :     unsigned NewReg = 0;
    2569             : 
    2570         299 :     unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
    2571             :     // In the epilog, we may need to look back one stage to get the correct
    2572             :     // Phi name because the epilog and prolog blocks execute the same stage.
    2573             :     // The correct name is from the previous block only when the Phi has
    2574             :     // been completely scheduled prior to the epilog, and Phi value is not
    2575             :     // needed in multiple stages.
    2576         299 :     int StageDiff = 0;
    2577         299 :     if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
    2578             :         NumPhis == 1)
    2579         108 :       StageDiff = 1;
    2580             :     // Adjust the computations below when the phi and the loop definition
    2581             :     // are scheduled in different stages.
    2582         299 :     if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
    2583           0 :       StageDiff = StageScheduled - LoopValStage;
    2584         611 :     for (unsigned np = 0; np < NumPhis; ++np) {
    2585             :       // If the Phi hasn't been scheduled, then use the initial Phi operand
    2586             :       // value. Otherwise, use the scheduled version of the instruction. This
    2587             :       // is a little complicated when a Phi references another Phi.
    2588         312 :       if (np > PrologStage || StageScheduled >= (int)LastStageNum)
    2589          93 :         PhiOp1 = InitVal;
    2590             :       // Check if the Phi has already been scheduled in a prolog stage.
    2591         219 :       else if (PrologStage >= AccessStage + StageDiff + np &&
    2592         135 :                VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
    2593         238 :         PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
    2594             :       // Check if the Phi has already been scheduled, but the loop intruction
    2595             :       // is either another Phi, or doesn't occur in the loop.
    2596         100 :       else if (PrologStage >= AccessStage + StageDiff + np) {
    2597             :         // If the Phi references another Phi, we need to examine the other
    2598             :         // Phi to get the correct value.
    2599           8 :         PhiOp1 = LoopVal;
    2600           8 :         MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
    2601           8 :         int Indirects = 1;
    2602          24 :         while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
    2603          16 :           int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
    2604           8 :           if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
    2605           8 :             PhiOp1 = getInitPhiReg(*InstOp1, BB);
    2606             :           else
    2607           8 :             PhiOp1 = getLoopPhiReg(*InstOp1, BB);
    2608           8 :           InstOp1 = MRI.getVRegDef(PhiOp1);
    2609          16 :           int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
    2610           8 :           int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
    2611           8 :           if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
    2612           4 :               VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
    2613           8 :             PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
    2614           4 :             break;
    2615             :           }
    2616           4 :           ++Indirects;
    2617             :         }
    2618             :       } else
    2619          92 :         PhiOp1 = InitVal;
    2620             :       // If this references a generated Phi in the kernel, get the Phi operand
    2621             :       // from the incoming block.
    2622         312 :       if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
    2623          12 :         if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
    2624           2 :           PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
    2625             : 
    2626         312 :       MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
    2627         624 :       bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
    2628             :       // In the epilog, a map lookup is needed to get the value from the kernel,
    2629             :       // or previous epilog block. How is does this depends on if the
    2630             :       // instruction is scheduled in the previous block.
    2631         312 :       if (!InKernel) {
    2632         172 :         int StageDiffAdj = 0;
    2633         172 :         if (LoopValStage != -1 && StageScheduled > LoopValStage)
    2634           0 :           StageDiffAdj = StageScheduled - LoopValStage;
    2635             :         // Use the loop value defined in the kernel, unless the kernel
    2636             :         // contains the last definition of the Phi.
    2637         307 :         if (np == 0 && PrevStage == LastStageNum &&
    2638         172 :             (StageScheduled != 0 || LoopValStage != 0) &&
    2639          39 :             VRMap[PrevStage - StageDiffAdj].count(LoopVal))
    2640          78 :           PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
    2641             :         // Use the value defined by the Phi. We add one because we switch
    2642             :         // from looking at the loop value to the Phi definition.
    2643         133 :         else if (np > 0 && PrevStage == LastStageNum &&
    2644           7 :                  VRMap[PrevStage - np + 1].count(Def))
    2645          14 :           PhiOp2 = VRMap[PrevStage - np + 1][Def];
    2646             :         // Use the loop value defined in the kernel.
    2647         126 :         else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
    2648           8 :                  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
    2649          16 :           PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
    2650             :         // Use the value defined by the Phi, unless we're generating the first
    2651             :         // epilog and the Phi refers to a Phi in a different stage.
    2652         236 :         else if (VRMap[PrevStage - np].count(Def) &&
    2653         118 :                  (!LoopDefIsPhi || PrevStage != LastStageNum))
    2654         228 :           PhiOp2 = VRMap[PrevStage - np][Def];
    2655             :       }
    2656             : 
    2657             :       // Check if we can reuse an existing Phi. This occurs when a Phi
    2658             :       // references another Phi, and the other Phi is scheduled in an
    2659             :       // earlier stage. We can try to reuse an existing Phi up until the last
    2660             :       // stage of the current Phi.
    2661         312 :       if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
    2662          24 :         int LVNumStages = Schedule.getStagesForPhi(LoopVal);
    2663          12 :         int StageDiff = (StageScheduled - LoopValStage);
    2664          12 :         LVNumStages -= StageDiff;
    2665          12 :         if (LVNumStages > (int)np) {
    2666           0 :           NewReg = PhiOp2;
    2667           0 :           unsigned ReuseStage = CurStageNum;
    2668           0 :           if (Schedule.isLoopCarried(this, *PhiInst))
    2669           0 :             ReuseStage -= LVNumStages;
    2670             :           // Check if the Phi to reuse has been generated yet. If not, then
    2671             :           // there is nothing to reuse.
    2672           0 :           if (VRMap[ReuseStage].count(LoopVal)) {
    2673           0 :             NewReg = VRMap[ReuseStage][LoopVal];
    2674             : 
    2675           0 :             rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2676           0 :                                   &*BBI, Def, NewReg);
    2677             :             // Update the map with the new Phi name.
    2678           0 :             VRMap[CurStageNum - np][Def] = NewReg;
    2679           0 :             PhiOp2 = NewReg;
    2680           0 :             if (VRMap[LastStageNum - np - 1].count(LoopVal))
    2681           0 :               PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
    2682             : 
    2683           0 :             if (IsLast && np == NumPhis - 1)
    2684           0 :               replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2685           0 :             continue;
    2686             :           }
    2687          12 :         } else if (InKernel && StageDiff > 0 &&
    2688           0 :                    VRMap[CurStageNum - StageDiff - np].count(LoopVal))
    2689           0 :           PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
    2690             :       }
    2691             : 
    2692         624 :       const TargetRegisterClass *RC = MRI.getRegClass(Def);
    2693         312 :       NewReg = MRI.createVirtualRegister(RC);
    2694             : 
    2695             :       MachineInstrBuilder NewPhi =
    2696         936 :           BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
    2697         624 :                   TII->get(TargetOpcode::PHI), NewReg);
    2698         624 :       NewPhi.addReg(PhiOp1).addMBB(BB1);
    2699         624 :       NewPhi.addReg(PhiOp2).addMBB(BB2);
    2700         312 :       if (np == 0)
    2701         891 :         InstrMap[NewPhi] = &*BBI;
    2702             : 
    2703             :       // We define the Phis after creating the new pipelined code, so
    2704             :       // we need to rename the Phi values in scheduled instructions.
    2705             : 
    2706         312 :       unsigned PrevReg = 0;
    2707         312 :       if (InKernel && VRMap[PrevStage - np].count(LoopVal))
    2708         280 :         PrevReg = VRMap[PrevStage - np][LoopVal];
    2709         624 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
    2710             :                             Def, NewReg, PrevReg);
    2711             :       // If the Phi has been scheduled, use the new name for rewriting.
    2712         616 :       if (VRMap[CurStageNum - np].count(Def)) {
    2713          16 :         unsigned R = VRMap[CurStageNum - np][Def];
    2714           8 :         rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
    2715             :                               R, NewReg);
    2716             :       }
    2717             : 
    2718             :       // Check if we need to rename any uses that occurs after the loop. The
    2719             :       // register to replace depends on whether the Phi is scheduled in the
    2720             :       // epilog.
    2721         312 :       if (IsLast && np == NumPhis - 1)
    2722         135 :         replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2723             : 
    2724             :       // In the kernel, a dependent Phi uses the value from this Phi.
    2725         312 :       if (InKernel)
    2726         140 :         PhiOp2 = NewReg;
    2727             : 
    2728             :       // Update the map with the new Phi name.
    2729         624 :       VRMap[CurStageNum - np][Def] = NewReg;
    2730             :     }
    2731             : 
    2732           0 :     while (NumPhis++ < NumStages) {
    2733           0 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
    2734           0 :                             &*BBI, Def, NewReg, 0);
    2735             :     }
    2736             : 
    2737             :     // Check if we need to rename a Phi that has been eliminated due to
    2738             :     // scheduling.
    2739         299 :     if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
    2740           0 :       replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
    2741             :   }
    2742         173 : }
    2743             : 
    2744             : /// Generate Phis for the specified block in the generated pipelined code.
    2745             : /// These are new Phis needed because the definition is scheduled after the
    2746             : /// use in the pipelened sequence.
    2747         173 : void SwingSchedulerDAG::generatePhis(
    2748             :     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
    2749             :     MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
    2750             :     InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
    2751             :     bool IsLast) {
    2752             :   // Compute the stage number that contains the initial Phi value, and
    2753             :   // the Phi from the previous stage.
    2754         173 :   unsigned PrologStage = 0;
    2755         173 :   unsigned PrevStage = 0;
    2756         173 :   unsigned StageDiff = CurStageNum - LastStageNum;
    2757         173 :   bool InKernel = (StageDiff == 0);
    2758         173 :   if (InKernel) {
    2759          82 :     PrologStage = LastStageNum - 1;
    2760          82 :     PrevStage = CurStageNum;
    2761             :   } else {
    2762          91 :     PrologStage = LastStageNum - StageDiff;
    2763          91 :     PrevStage = LastStageNum + StageDiff - 1;
    2764             :   }
    2765             : 
    2766         173 :   for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
    2767         346 :                                    BBE = BB->instr_end();
    2768        1501 :        BBI != BBE; ++BBI) {
    2769        7208 :     for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
    2770        9104 :       MachineOperand &MO = BBI->getOperand(i);
    2771        9557 :       if (!MO.isReg() || !MO.isDef() ||
    2772        2944 :           !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    2773        7401 :         continue;
    2774             : 
    2775        2766 :       int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
    2776             :       assert(StageScheduled != -1 && "Expecting scheduled instruction.");
    2777         922 :       unsigned Def = MO.getReg();
    2778         922 :       unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
    2779             :       // An instruction scheduled in stage 0 and is used after the loop
    2780             :       // requires a phi in the epilog for the last definition from either
    2781             :       // the kernel or prolog.
    2782        1121 :       if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
    2783         199 :           hasUseAfterLoop(Def, BB, MRI))
    2784             :         NumPhis = 1;
    2785         922 :       if (!InKernel && (unsigned)StageScheduled > PrologStage)
    2786         141 :         continue;
    2787             : 
    2788        1562 :       unsigned PhiOp2 = VRMap[PrevStage][Def];
    2789         781 :       if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
    2790          26 :         if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
    2791             :           PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
    2792             :       // The number of Phis can't exceed the number of prolog stages. The
    2793             :       // prolog stage number is zero based.
    2794         781 :       if (NumPhis > PrologStage + 1 - StageScheduled)
    2795           3 :         NumPhis = PrologStage + 1 - StageScheduled;
    2796        1069 :       for (unsigned np = 0; np < NumPhis; ++np) {
    2797         576 :         unsigned PhiOp1 = VRMap[PrologStage][Def];
    2798         288 :         if (np <= PrologStage)
    2799         576 :           PhiOp1 = VRMap[PrologStage - np][Def];
    2800         288 :         if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
    2801         134 :           if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
    2802             :             PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
    2803         134 :           if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
    2804             :             PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
    2805             :         }
    2806         288 :         if (!InKernel)
    2807         314 :           PhiOp2 = VRMap[PrevStage - np][Def];
    2808             : 
    2809         576 :         const TargetRegisterClass *RC = MRI.getRegClass(Def);
    2810         288 :         unsigned NewReg = MRI.createVirtualRegister(RC);
    2811             : 
    2812             :         MachineInstrBuilder NewPhi =
    2813         864 :             BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
    2814         576 :                     TII->get(TargetOpcode::PHI), NewReg);
    2815         576 :         NewPhi.addReg(PhiOp1).addMBB(BB1);
    2816         576 :         NewPhi.addReg(PhiOp2).addMBB(BB2);
    2817         288 :         if (np == 0)
    2818         846 :           InstrMap[NewPhi] = &*BBI;
    2819             : 
    2820             :         // Rewrite uses and update the map. The actions depend upon whether
    2821             :         // we generating code for the kernel or epilog blocks.
    2822         288 :         if (InKernel) {
    2823         131 :           rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2824         131 :                                 &*BBI, PhiOp1, NewReg);
    2825         131 :           rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2826         131 :                                 &*BBI, PhiOp2, NewReg);
    2827             : 
    2828         131 :           PhiOp2 = NewReg;
    2829         262 :           VRMap[PrevStage - np - 1][Def] = NewReg;
    2830             :         } else {
    2831         314 :           VRMap[CurStageNum - np][Def] = NewReg;
    2832         157 :           if (np == NumPhis - 1)
    2833         308 :             rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
    2834         154 :                                   &*BBI, Def, NewReg);
    2835             :         }
    2836         288 :         if (IsLast && np == NumPhis - 1)
    2837         111 :           replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
    2838             :       }
    2839             :     }
    2840             :   }
    2841         173 : }
    2842             : 
    2843             : /// Remove instructions that generate values with no uses.
    2844             : /// Typically, these are induction variable operations that generate values
    2845             : /// used in the loop itself.  A dead instruction has a definition with
    2846             : /// no uses, or uses that occur in the original loop only.
    2847          82 : void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
    2848             :                                                MBBVectorTy &EpilogBBs) {
    2849             :   // For each epilog block, check that the value defined by each instruction
    2850             :   // is used.  If not, delete it.
    2851          82 :   for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
    2852          82 :                                      MBE = EpilogBBs.rend();
    2853         173 :        MBB != MBE; ++MBB)
    2854         182 :     for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
    2855         182 :                                                    ME = (*MBB)->instr_rend();
    2856         701 :          MI != ME;) {
    2857             :       // From DeadMachineInstructionElem. Don't delete inline assembly.
    2858        1221 :       if (MI->isInlineAsm()) {
    2859           1 :         ++MI;
    2860         280 :         continue;
    2861             :       }
    2862         609 :       bool SawStore = false;
    2863             :       // Check if it's safe to remove the instruction due to side effects.
    2864             :       // We can, and want to, remove Phis here.
    2865        1397 :       if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
    2866         153 :         ++MI;
    2867             :         continue;
    2868             :       }
    2869         456 :       bool used = true;
    2870        1080 :       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
    2871         912 :                                       MOE = MI->operands_end();
    2872        1080 :            MOI != MOE; ++MOI) {
    2873        2159 :         if (!MOI->isReg() || !MOI->isDef())
    2874             :           continue;
    2875         456 :         unsigned reg = MOI->getReg();
    2876         456 :         unsigned realUses = 0;
    2877         456 :         for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
    2878         456 :                                                EI = MRI.use_end();
    2879         456 :              UI != EI; ++UI) {
    2880             :           // Check if there are any uses that occur only in the original
    2881             :           // loop.  If so, that's not a real use.
    2882         330 :           if (UI->getParent()->getParent() != BB) {
    2883             :             realUses++;
    2884             :             used = true;
    2885             :             break;
    2886             :           }
    2887             :         }
    2888         456 :         if (realUses > 0)
    2889             :           break;
    2890             :         used = false;
    2891             :       }
    2892         582 :       if (!used) {
    2893         378 :         MI++->eraseFromParent();
    2894             :         continue;
    2895             :       }
    2896         330 :       ++MI;
    2897             :     }
    2898             :   // In the kernel block, check if we can remove a Phi that generates a value
    2899             :   // used in an instruction removed in the epilog block.
    2900         164 :   for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
    2901          82 :                                    BBE = KernelBB->getFirstNonPHI();
    2902         353 :        BBI != BBE;) {
    2903         271 :     MachineInstr *MI = &*BBI;
    2904         271 :     ++BBI;
    2905         271 :     unsigned reg = MI->getOperand(0).getReg();
    2906         542 :     if (MRI.use_begin(reg) == MRI.use_end()) {
    2907           0 :       MI->eraseFromParent();
    2908             :     }
    2909             :   }
    2910          82 : }
    2911             : 
    2912             : /// For loop carried definitions, we split the lifetime of a virtual register
    2913             : /// that has uses past the definition in the next iteration. A copy with a new
    2914             : /// virtual register is inserted before the definition, which helps with
    2915             : /// generating a better register assignment.
    2916             : ///
    2917             : ///   v1 = phi(a, v2)     v1 = phi(a, v2)
    2918             : ///   v2 = phi(b, v3)     v2 = phi(b, v3)
    2919             : ///   v3 = ..             v4 = copy v1
    2920             : ///   .. = V1             v3 = ..
    2921             : ///                       .. = v4
    2922          82 : void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
    2923             :                                        MBBVectorTy &EpilogBBs,
    2924             :                                        SMSchedule &Schedule) {
    2925          82 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    2926         164 :   for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
    2927          82 :                                    BBF = KernelBB->getFirstNonPHI();
    2928         353 :        BBI != BBF; ++BBI) {
    2929         271 :     unsigned Def = BBI->getOperand(0).getReg();
    2930             :     // Check for any Phi definition that used as an operand of another Phi
    2931             :     // in the same block.
    2932         271 :     for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
    2933         271 :                                                  E = MRI.use_instr_end();
    2934         753 :          I != E; ++I) {
    2935         596 :       if (I->isPHI() && I->getParent() == KernelBB) {
    2936             :         // Get the loop carried definition.
    2937          28 :         unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
    2938          14 :         if (!LCDef)
    2939             :           continue;
    2940          14 :         MachineInstr *MI = MRI.getVRegDef(LCDef);
    2941          28 :         if (!MI || MI->getParent() != KernelBB || MI->isPHI())
    2942             :           continue;
    2943             :         // Search through the rest of the block looking for uses of the Phi
    2944             :         // definition. If one occurs, then split the lifetime.
    2945          14 :         unsigned SplitReg = 0;
    2946             :         for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
    2947         282 :                                     KernelBB->instr_end()))
    2948         113 :           if (BBJ.readsRegister(Def)) {
    2949             :             // We split the lifetime when we find the first use.
    2950          12 :             if (SplitReg == 0) {
    2951          12 :               SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
    2952          12 :               BuildMI(*KernelBB, MI, MI->getDebugLoc(),
    2953          18 :                       TII->get(TargetOpcode::COPY), SplitReg)
    2954           6 :                   .addReg(Def);
    2955             :             }
    2956          12 :             BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
    2957             :           }
    2958          14 :         if (!SplitReg)
    2959             :           continue;
    2960             :         // Search through each of the epilog blocks for any uses to be renamed.
    2961          24 :         for (auto &Epilog : EpilogBBs)
    2962         136 :           for (auto &I : *Epilog)
    2963          56 :             if (I.readsRegister(Def))
    2964          12 :               I.substituteRegister(Def, SplitReg, 0, *TRI);
    2965             :         break;
    2966             :       }
    2967             :     }
    2968             :   }
    2969          82 : }
    2970             : 
    2971             : /// Remove the incoming block from the Phis in a basic block.
    2972          17 : static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
    2973         147 :   for (MachineInstr &MI : *BB) {
    2974             :     if (!MI.isPHI())
    2975             :       break;
    2976          33 :     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
    2977          66 :       if (MI.getOperand(i + 1).getMBB() == Incoming) {
    2978          31 :         MI.RemoveOperand(i + 1);
    2979          31 :         MI.RemoveOperand(i);
    2980          31 :         break;
    2981             :       }
    2982             :   }
    2983          17 : }
    2984             : 
    2985             : /// Create branches from each prolog basic block to the appropriate epilog
    2986             : /// block.  These edges are needed if the loop ends before reaching the
    2987             : /// kernel.
    2988          82 : void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
    2989             :                                     MachineBasicBlock *KernelBB,
    2990             :                                     MBBVectorTy &EpilogBBs,
    2991             :                                     SMSchedule &Schedule, ValueMapTy *VRMap) {
    2992             :   assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
    2993          82 :   MachineInstr *IndVar = Pass.LI.LoopInductionVar;
    2994          82 :   MachineInstr *Cmp = Pass.LI.LoopCompare;
    2995          82 :   MachineBasicBlock *LastPro = KernelBB;
    2996          82 :   MachineBasicBlock *LastEpi = KernelBB;
    2997             : 
    2998             :   // Start from the blocks connected to the kernel and work "out"
    2999             :   // to the first prolog and the last epilog blocks.
    3000         164 :   SmallVector<MachineInstr *, 4> PrevInsts;
    3001         164 :   unsigned MaxIter = PrologBBs.size() - 1;
    3002          82 :   unsigned LC = UINT_MAX;
    3003          82 :   unsigned LCMin = UINT_MAX;
    3004         173 :   for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
    3005             :     // Add branches to the prolog that go to the corresponding
    3006             :     // epilog, and the fall-thru prolog/kernel block.
    3007         182 :     MachineBasicBlock *Prolog = PrologBBs[j];
    3008         182 :     MachineBasicBlock *Epilog = EpilogBBs[i];
    3009             :     // We've executed one iteration, so decrement the loop count and check for
    3010             :     // the loop end.
    3011         182 :     SmallVector<MachineOperand, 4> Cond;
    3012             :     // Check if the LOOP0 has already been removed. If so, then there is no need
    3013             :     // to reduce the trip count.
    3014          91 :     if (LC != 0)
    3015         182 :       LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
    3016          91 :                                 MaxIter);
    3017             : 
    3018             :     // Record the value of the first trip count, which is used to determine if
    3019             :     // branches and blocks can be removed for constant trip counts.
    3020          91 :     if (LCMin == UINT_MAX)
    3021          82 :       LCMin = LC;
    3022             : 
    3023          91 :     unsigned numAdded = 0;
    3024          91 :     if (TargetRegisterInfo::isVirtualRegister(LC)) {
    3025          74 :       Prolog->addSuccessor(Epilog);
    3026         296 :       numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
    3027          17 :     } else if (j >= LCMin) {
    3028           1 :       Prolog->addSuccessor(Epilog);
    3029           1 :       Prolog->removeSuccessor(LastPro);
    3030           1 :       LastEpi->removeSuccessor(Epilog);
    3031           4 :       numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
    3032           1 :       removePhis(Epilog, LastEpi);
    3033             :       // Remove the blocks that are no longer referenced.
    3034           1 :       if (LastPro != LastEpi) {
    3035           0 :         LastEpi->clear();
    3036           0 :         LastEpi->eraseFromParent();
    3037             :       }
    3038           1 :       LastPro->clear();
    3039           1 :       LastPro->eraseFromParent();
    3040             :     } else {
    3041          64 :       numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
    3042          16 :       removePhis(Epilog, Prolog);
    3043             :     }
    3044          91 :     LastPro = Prolog;
    3045          91 :     LastEpi = Epilog;
    3046         256 :     for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
    3047          91 :                                                    E = Prolog->instr_rend();
    3048         256 :          I != E && numAdded > 0; ++I, --numAdded)
    3049         165 :       updateInstruction(&*I, false, j, 0, Schedule, VRMap);
    3050             :   }
    3051          82 : }
    3052             : 
    3053             : /// Return true if we can compute the amount the instruction changes
    3054             : /// during each iteration. Set Delta to the amount of the change.
    3055       35801 : bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
    3056       35801 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    3057             :   unsigned BaseReg;
    3058             :   int64_t Offset;
    3059       35801 :   if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
    3060             :     return false;
    3061             : 
    3062        1324 :   MachineRegisterInfo &MRI = MF.getRegInfo();
    3063             :   // Check if there is a Phi. If so, get the definition in the loop.
    3064        1324 :   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
    3065        1324 :   if (BaseDef && BaseDef->isPHI()) {
    3066        2266 :     BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
    3067        1133 :     BaseDef = MRI.getVRegDef(BaseReg);
    3068             :   }
    3069        1324 :   if (!BaseDef)
    3070             :     return false;
    3071             : 
    3072        1317 :   int D = 0;
    3073        1317 :   if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
    3074             :     return false;
    3075             : 
    3076        1123 :   Delta = D;
    3077             :   return true;
    3078             : }
    3079             : 
    3080             : /// Update the memory operand with a new offset when the pipeliner
    3081             : /// generates a new copy of the instruction that refers to a
    3082             : /// different memory location.
    3083         982 : void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
    3084             :                                           MachineInstr &OldMI, unsigned Num) {
    3085         982 :   if (Num == 0)
    3086             :     return;
    3087             :   // If the instruction has memory operands, then adjust the offset
    3088             :   // when the instruction appears in different stages.
    3089        1108 :   unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
    3090         554 :   if (NumRefs == 0)
    3091             :     return;
    3092         189 :   MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
    3093         189 :   unsigned Refs = 0;
    3094         414 :   for (MachineMemOperand *MMO : NewMI.memoperands()) {
    3095         900 :     if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
    3096         217 :         (!MMO->getValue())) {
    3097           8 :       NewMemRefs[Refs++] = MMO;
    3098           8 :       continue;
    3099             :     }
    3100             :     unsigned Delta;
    3101         217 :     if (computeDelta(OldMI, Delta)) {
    3102          77 :       int64_t AdjOffset = Delta * Num;
    3103         154 :       NewMemRefs[Refs++] =
    3104         154 :           MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
    3105             :     } else
    3106         140 :       NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
    3107             :   }
    3108         189 :   NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
    3109             : }
    3110             : 
    3111             : /// Clone the instruction for the new pipelined loop and update the
    3112             : /// memory operands, if needed.
    3113         627 : MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
    3114             :                                             unsigned CurStageNum,
    3115             :                                             unsigned InstStageNum) {
    3116         627 :   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
    3117             :   // Check for tied operands in inline asm instructions. This should be handled
    3118             :   // elsewhere, but I'm not sure of the best solution.
    3119         627 :   if (OldMI->isInlineAsm())
    3120           8 :     for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
    3121          16 :       const auto &MO = OldMI->getOperand(i);
    3122          10 :       if (MO.isReg() && MO.isUse())
    3123             :         break;
    3124             :       unsigned UseIdx;
    3125           6 :       if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
    3126           0 :         NewMI->tieOperands(i, UseIdx);
    3127             :     }
    3128         627 :   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
    3129         627 :   return NewMI;
    3130             : }
    3131             : 
    3132             : /// Clone the instruction for the new pipelined loop. If needed, this
    3133             : /// function updates the instruction using the values saved in the
    3134             : /// InstrChanges structure.
    3135         355 : MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
    3136             :                                                      unsigned CurStageNum,
    3137             :                                                      unsigned InstStageNum,
    3138             :                                                      SMSchedule &Schedule) {
    3139         355 :   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
    3140             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
    3141         710 :       InstrChanges.find(getSUnit(OldMI));
    3142        1065 :   if (It != InstrChanges.end()) {
    3143           7 :     std::pair<unsigned, int64_t> RegAndOffset = It->second;
    3144             :     unsigned BasePos, OffsetPos;
    3145           7 :     if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
    3146           0 :       return nullptr;
    3147          14 :     int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
    3148           7 :     MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
    3149          14 :     if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
    3150           7 :       NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
    3151          21 :     NewMI->getOperand(OffsetPos).setImm(NewOffset);
    3152             :   }
    3153         355 :   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
    3154         355 :   return NewMI;
    3155             : }
    3156             : 
    3157             : /// Update the machine instruction with new virtual registers.  This
    3158             : /// function may change the defintions and/or uses.
    3159        1311 : void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
    3160             :                                           unsigned CurStageNum,
    3161             :                                           unsigned InstrStageNum,
    3162             :                                           SMSchedule &Schedule,
    3163             :                                           ValueMapTy *VRMap) {
    3164        5630 :   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
    3165        8638 :     MachineOperand &MO = NewMI->getOperand(i);
    3166        7636 :     if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3167        1624 :       continue;
    3168        2695 :     unsigned reg = MO.getReg();
    3169        2695 :     if (MO.isDef()) {
    3170             :       // Create a new virtual register for the definition.
    3171        1844 :       const TargetRegisterClass *RC = MRI.getRegClass(reg);
    3172         922 :       unsigned NewReg = MRI.createVirtualRegister(RC);
    3173         922 :       MO.setReg(NewReg);
    3174        1844 :       VRMap[CurStageNum][reg] = NewReg;
    3175         922 :       if (LastDef)
    3176         122 :         replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
    3177        1773 :     } else if (MO.isUse()) {
    3178        1773 :       MachineInstr *Def = MRI.getVRegDef(reg);
    3179             :       // Compute the stage that contains the last definition for instruction.
    3180        3546 :       int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
    3181        1773 :       unsigned StageNum = CurStageNum;
    3182        1773 :       if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
    3183             :         // Compute the difference in stages between the defintion and the use.
    3184         181 :         unsigned StageDiff = (InstrStageNum - DefStageNum);
    3185             :         // Make an adjustment to get the last definition.
    3186         181 :         StageNum -= StageDiff;
    3187             :       }
    3188        2898 :       if (VRMap[StageNum].count(reg))
    3189        1296 :         MO.setReg(VRMap[StageNum][reg]);
    3190             :     }
    3191             :   }
    3192        1311 : }
    3193             : 
    3194             : /// Return the instruction in the loop that defines the register.
    3195             : /// If the definition is a Phi, then follow the Phi operand to
    3196             : /// the instruction in the loop.
    3197          16 : MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
    3198          32 :   SmallPtrSet<MachineInstr *, 8> Visited;
    3199          16 :   MachineInstr *Def = MRI.getVRegDef(Reg);
    3200             :   while (Def->isPHI()) {
    3201           9 :     if (!Visited.insert(Def).second)
    3202             :       break;
    3203          18 :     for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
    3204          36 :       if (Def->getOperand(i + 1).getMBB() == BB) {
    3205          18 :         Def = MRI.getVRegDef(Def->getOperand(i).getReg());
    3206             :         break;
    3207             :       }
    3208             :   }
    3209          32 :   return Def;
    3210             : }
    3211             : 
    3212             : /// Return the new name for the value from the previous stage.
    3213         165 : unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
    3214             :                                           unsigned LoopVal, unsigned LoopStage,
    3215             :                                           ValueMapTy *VRMap,
    3216             :                                           MachineBasicBlock *BB) {
    3217         165 :   unsigned PrevVal = 0;
    3218         165 :   if (StageNum > PhiStage) {
    3219          20 :     MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
    3220          40 :     if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
    3221             :       // The name is defined in the previous stage.
    3222          32 :       PrevVal = VRMap[StageNum - 1][LoopVal];
    3223           8 :     else if (VRMap[StageNum].count(LoopVal))
    3224             :       // The previous name is defined in the current stage when the instruction
    3225             :       // order is swapped.
    3226           0 :       PrevVal = VRMap[StageNum][LoopVal];
    3227           4 :     else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
    3228             :       // The loop value hasn't yet been scheduled.
    3229           0 :       PrevVal = LoopVal;
    3230           4 :     else if (StageNum == PhiStage + 1)
    3231             :       // The loop value is another phi, which has not been scheduled.
    3232             :       PrevVal = getInitPhiReg(*LoopInst, BB);
    3233           0 :     else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
    3234             :       // The loop value is another phi, which has been scheduled.
    3235           0 :       PrevVal =
    3236           0 :           getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
    3237             :                         LoopStage, VRMap, BB);
    3238             :   }
    3239         165 :   return PrevVal;
    3240             : }
    3241             : 
    3242             : /// Rewrite the Phi values in the specified block to use the mappings
    3243             : /// from the initial operand. Once the Phi is scheduled, we switch
    3244             : /// to using the loop value instead of the Phi value, so those names
    3245             : /// do not need to be rewritten.
    3246          91 : void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
    3247             :                                          unsigned StageNum,
    3248             :                                          SMSchedule &Schedule,
    3249             :                                          ValueMapTy *VRMap,
    3250             :                                          InstrMapTy &InstrMap) {
    3251         273 :   for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
    3252          91 :                                    BBE = BB->getFirstNonPHI();
    3253         255 :        BBI != BBE; ++BBI) {
    3254         164 :     unsigned InitVal = 0;
    3255         164 :     unsigned LoopVal = 0;
    3256         492 :     getPhiRegs(*BBI, BB, InitVal, LoopVal);
    3257         164 :     unsigned PhiDef = BBI->getOperand(0).getReg();
    3258             : 
    3259             :     unsigned PhiStage =
    3260         328 :         (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
    3261             :     unsigned LoopStage =
    3262         328 :         (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
    3263         328 :     unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
    3264         164 :     if (NumPhis > StageNum)
    3265           7 :       NumPhis = StageNum;
    3266         329 :     for (unsigned np = 0; np <= NumPhis; ++np) {
    3267             :       unsigned NewVal =
    3268         165 :           getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
    3269         165 :       if (!NewVal)
    3270         145 :         NewVal = InitVal;
    3271         165 :       rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
    3272             :                             PhiDef, NewVal);
    3273             :     }
    3274             :   }
    3275          91 : }
    3276             : 
    3277             : /// Rewrite a previously scheduled instruction to use the register value
    3278             : /// from the new instruction. Make sure the instruction occurs in the
    3279             : /// basic block, and we don't change the uses in the new instruction.
    3280         903 : void SwingSchedulerDAG::rewriteScheduledInstr(
    3281             :     MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
    3282             :     unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
    3283             :     unsigned NewReg, unsigned PrevReg) {
    3284        1806 :   bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
    3285        1806 :   int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
    3286             :   // Rewrite uses that have been scheduled already to use the new
    3287             :   // Phi register.
    3288         903 :   for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
    3289         903 :                                          EI = MRI.use_end();
    3290        3181 :        UI != EI;) {
    3291        2278 :     MachineOperand &UseOp = *UI;
    3292        2278 :     MachineInstr *UseMI = UseOp.getParent();
    3293        2278 :     ++UI;
    3294        2278 :     if (UseMI->getParent() != BB)
    3295        2555 :       continue;
    3296             :     if (UseMI->isPHI()) {
    3297         536 :       if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
    3298         262 :         continue;
    3299          27 :       if (getLoopPhiReg(*UseMI, BB) != OldReg)
    3300          21 :         continue;
    3301             :     }
    3302         859 :     InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
    3303             :     assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
    3304        1718 :     SUnit *OrigMISU = getSUnit(OrigInstr->second);
    3305         859 :     int StageSched = Schedule.stageScheduled(OrigMISU);
    3306         859 :     int CycleSched = Schedule.cycleScheduled(OrigMISU);
    3307         859 :     unsigned ReplaceReg = 0;
    3308             :     // This is the stage for the scheduled instruction.
    3309         859 :     if (StagePhi == StageSched && Phi->isPHI()) {
    3310        1566 :       int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
    3311         522 :       if (PrevReg && InProlog)
    3312             :         ReplaceReg = PrevReg;
    3313         522 :       else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
    3314           0 :                (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
    3315             :         ReplaceReg = PrevReg;
    3316             :       else
    3317             :         ReplaceReg = NewReg;
    3318             :     }
    3319             :     // The scheduled instruction occurs before the scheduled Phi, and the
    3320             :     // Phi is not loop carried.
    3321        1165 :     if (!InProlog && StagePhi + 1 == StageSched &&
    3322         306 :         !Schedule.isLoopCarried(this, *Phi))
    3323             :       ReplaceReg = NewReg;
    3324         859 :     if (StagePhi > StageSched && Phi->isPHI())
    3325             :       ReplaceReg = NewReg;
    3326        1145 :     if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
    3327             :       ReplaceReg = NewReg;
    3328         859 :     if (ReplaceReg) {
    3329        1628 :       MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
    3330         814 :       UseOp.setReg(ReplaceReg);
    3331             :     }
    3332             :   }
    3333         903 : }
    3334             : 
    3335             : /// Check if we can change the instruction to use an offset value from the
    3336             : /// previous iteration. If so, return true and set the base and offset values
    3337             : /// so that we can rewrite the load, if necessary.
    3338             : ///   v1 = Phi(v0, v3)
    3339             : ///   v2 = load v1, 0
    3340             : ///   v3 = post_store v1, 4, x
    3341             : /// This function enables the load to be rewritten as v2 = load v3, 4.
    3342        1361 : bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
    3343             :                                               unsigned &BasePos,
    3344             :                                               unsigned &OffsetPos,
    3345             :                                               unsigned &NewBase,
    3346             :                                               int64_t &Offset) {
    3347             :   // Get the load instruction.
    3348        1361 :   if (TII->isPostIncrement(*MI))
    3349             :     return false;
    3350             :   unsigned BasePosLd, OffsetPosLd;
    3351        1295 :   if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
    3352             :     return false;
    3353         310 :   unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
    3354             : 
    3355             :   // Look for the Phi instruction.
    3356         155 :   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
    3357         155 :   MachineInstr *Phi = MRI.getVRegDef(BaseReg);
    3358         155 :   if (!Phi || !Phi->isPHI())
    3359             :     return false;
    3360             :   // Get the register defined in the loop block.
    3361         158 :   unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
    3362          79 :   if (!PrevReg)
    3363             :     return false;
    3364             : 
    3365             :   // Check for the post-increment load/store instruction.
    3366          79 :   MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
    3367          79 :   if (!PrevDef || PrevDef == MI)
    3368             :     return false;
    3369             : 
    3370          79 :   if (!TII->isPostIncrement(*PrevDef))
    3371             :     return false;
    3372             : 
    3373          10 :   unsigned BasePos1 = 0, OffsetPos1 = 0;
    3374          10 :   if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
    3375             :     return false;
    3376             : 
    3377             :   // Make sure offset values are both positive or both negative.
    3378          20 :   int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
    3379          20 :   int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
    3380          10 :   if ((LoadOffset >= 0) != (StoreOffset >= 0))
    3381             :     return false;
    3382             : 
    3383             :   // Set the return value once we determine that we return true.
    3384          10 :   BasePos = BasePosLd;
    3385          10 :   OffsetPos = OffsetPosLd;
    3386          10 :   NewBase = PrevReg;
    3387          10 :   Offset = StoreOffset;
    3388             :   return true;
    3389             : }
    3390             : 
    3391             : /// Apply changes to the instruction if needed. The changes are need
    3392             : /// to improve the scheduling and depend up on the final schedule.
    3393         720 : MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
    3394             :                                                   SMSchedule &Schedule,
    3395             :                                                   bool UpdateDAG) {
    3396        1440 :   SUnit *SU = getSUnit(MI);
    3397             :   DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
    3398         720 :       InstrChanges.find(SU);
    3399        2160 :   if (It != InstrChanges.end()) {
    3400           9 :     std::pair<unsigned, int64_t> RegAndOffset = It->second;
    3401             :     unsigned BasePos, OffsetPos;
    3402           9 :     if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
    3403           7 :       return nullptr;
    3404          18 :     unsigned BaseReg = MI->getOperand(BasePos).getReg();
    3405           9 :     MachineInstr *LoopDef = findDefInLoop(BaseReg);
    3406          18 :     int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
    3407          27 :     int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
    3408           9 :     int BaseStageNum = Schedule.stageScheduled(SU);
    3409           9 :     int BaseCycleNum = Schedule.cycleScheduled(SU);
    3410           9 :     if (BaseStageNum < DefStageNum) {
    3411           7 :       MachineInstr *NewMI = MF.CloneMachineInstr(MI);
    3412           7 :       int OffsetDiff = DefStageNum - BaseStageNum;
    3413           7 :       if (DefCycleNum < BaseCycleNum) {
    3414           0 :         NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
    3415           0 :         if (OffsetDiff > 0)
    3416           0 :           --OffsetDiff;
    3417             :       }
    3418             :       int64_t NewOffset =
    3419          14 :           MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
    3420          21 :       NewMI->getOperand(OffsetPos).setImm(NewOffset);
    3421           7 :       if (UpdateDAG) {
    3422          14 :         SU->setInstr(NewMI);
    3423          14 :         MISUnitMap[NewMI] = SU;
    3424             :       }
    3425           7 :       NewMIs.insert(NewMI);
    3426           7 :       return NewMI;
    3427             :     }
    3428             :   }
    3429             :   return nullptr;
    3430             : }
    3431             : 
    3432             : /// Return true for an order dependence that is loop carried potentially.
    3433             : /// An order dependence is loop carried if the destination defines a value
    3434             : /// that may be used by the source in a subsequent iteration.
    3435       54557 : bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
    3436             :                                            bool isSucc) {
    3437      107251 :   if (!isOrder(Source, Dep) || Dep.isArtificial())
    3438             :     return false;
    3439             : 
    3440       52694 :   if (!SwpPruneLoopCarried)
    3441             :     return true;
    3442             : 
    3443       52694 :   MachineInstr *SI = Source->getInstr();
    3444       52694 :   MachineInstr *DI = Dep.getSUnit()->getInstr();
    3445       52694 :   if (!isSucc)
    3446             :     std::swap(SI, DI);
    3447             :   assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
    3448             : 
    3449             :   // Assume ordered loads and stores may have a loop carried dependence.
    3450      158076 :   if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
    3451      158045 :       SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
    3452             :     return true;
    3453             : 
    3454             :   // Only chain dependences between a load and store can be loop carried.
    3455       52646 :   if (!DI->mayStore() || !SI->mayLoad())
    3456             :     return false;
    3457             : 
    3458             :   unsigned DeltaS, DeltaD;
    3459       34661 :   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
    3460             :     return true;
    3461             : 
    3462             :   unsigned BaseRegS, BaseRegD;
    3463             :   int64_t OffsetS, OffsetD;
    3464         123 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    3465         246 :   if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
    3466         123 :       !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
    3467             :     return true;
    3468             : 
    3469         123 :   if (BaseRegS != BaseRegD)
    3470             :     return true;
    3471             : 
    3472          74 :   uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
    3473          74 :   uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
    3474             : 
    3475             :   // This is the main test, which checks the offset values and the loop
    3476             :   // increment value to determine if the accesses may be loop carried.
    3477          74 :   if (OffsetS >= OffsetD)
    3478          48 :     return OffsetS + AccessSizeS > DeltaS;
    3479             :   else
    3480          26 :     return OffsetD + AccessSizeD > DeltaD;
    3481             : 
    3482             :   return true;
    3483             : }
    3484             : 
    3485             : void SwingSchedulerDAG::postprocessDAG() {
    3486         678 :   for (auto &M : Mutations)
    3487         226 :     M->apply(this);
    3488             : }
    3489             : 
    3490             : /// Try to schedule the node at the specified StartCycle and continue
    3491             : /// until the node is schedule or the EndCycle is reached.  This function
    3492             : /// returns true if the node is scheduled.  This routine may search either
    3493             : /// forward or backward for a place to insert the instruction based upon
    3494             : /// the relative values of StartCycle and EndCycle.
    3495        1325 : bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
    3496        1325 :   bool forward = true;
    3497        1325 :   if (StartCycle > EndCycle)
    3498         308 :     forward = false;
    3499             : 
    3500             :   // The terminating condition depends on the direction.
    3501        1325 :   int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
    3502        2975 :   for (int curCycle = StartCycle; curCycle != termCycle;
    3503             :        forward ? ++curCycle : --curCycle) {
    3504             : 
    3505             :     // Add the already scheduled instructions at the specified cycle to the DFA.
    3506        4268 :     Resources->clearResources();
    3507        5145 :     for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
    3508        8156 :          checkCycle <= LastCycle; checkCycle += II) {
    3509        6022 :       std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
    3510             : 
    3511        3011 :       for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
    3512             :                                          E = cycleInstrs.end();
    3513        6983 :            I != E; ++I) {
    3514        7944 :         if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
    3515         693 :           continue;
    3516             :         assert(Resources->canReserveResources(*(*I)->getInstr()) &&
    3517             :                "These instructions have already been scheduled.");
    3518        6558 :         Resources->reserveResources(*(*I)->getInstr());
    3519             :       }
    3520             :     }
    3521        6143 :     if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
    3522        3750 :         Resources->canReserveResources(*SU->getInstr())) {
    3523             :       DEBUG({
    3524             :         dbgs() << "\tinsert at cycle " << curCycle << " ";
    3525             :         SU->getInstr()->dump();
    3526             :       });
    3527             : 
    3528        2600 :       ScheduledInstrs[curCycle].push_back(SU);
    3529        3900 :       InstrToCycle.insert(std::make_pair(SU, curCycle));
    3530        1300 :       if (curCycle > LastCycle)
    3531         325 :         LastCycle = curCycle;
    3532        1300 :       if (curCycle < FirstCycle)
    3533           9 :         FirstCycle = curCycle;
    3534        1300 :       return true;
    3535             :     }
    3536             :     DEBUG({
    3537             :       dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
    3538             :       SU->getInstr()->dump();
    3539             :     });
    3540             :   }
    3541          25 :   return false;
    3542             : }
    3543             : 
    3544             : // Return the cycle of the earliest scheduled instruction in the chain.
    3545          11 : int SMSchedule::earliestCycleInChain(const SDep &Dep) {
    3546          22 :   SmallPtrSet<SUnit *, 8> Visited;
    3547          22 :   SmallVector<SDep, 8> Worklist;
    3548          11 :   Worklist.push_back(Dep);
    3549          11 :   int EarlyCycle = INT_MAX;
    3550         248 :   while (!Worklist.empty()) {
    3551         237 :     const SDep &Cur = Worklist.pop_back_val();
    3552         237 :     SUnit *PrevSU = Cur.getSUnit();
    3553         237 :     if (Visited.count(PrevSU))
    3554          97 :       continue;
    3555         420 :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
    3556         280 :     if (it == InstrToCycle.end())
    3557          15 :       continue;
    3558         250 :     EarlyCycle = std::min(EarlyCycle, it->second);
    3559         728 :     for (const auto &PI : PrevSU->Preds)
    3560         353 :       if (SwingSchedulerDAG::isOrder(PrevSU, PI))
    3561         226 :         Worklist.push_back(PI);
    3562         125 :     Visited.insert(PrevSU);
    3563             :   }
    3564          22 :   return EarlyCycle;
    3565             : }
    3566             : 
    3567             : // Return the cycle of the latest scheduled instruction in the chain.
    3568          86 : int SMSchedule::latestCycleInChain(const SDep &Dep) {
    3569         172 :   SmallPtrSet<SUnit *, 8> Visited;
    3570         172 :   SmallVector<SDep, 8> Worklist;
    3571          86 :   Worklist.push_back(Dep);
    3572          86 :   int LateCycle = INT_MIN;
    3573         671 :   while (!Worklist.empty()) {
    3574         585 :     const SDep &Cur = Worklist.pop_back_val();
    3575         585 :     SUnit *SuccSU = Cur.getSUnit();
    3576         585 :     if (Visited.count(SuccSU))
    3577         136 :       continue;
    3578        1347 :     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
    3579         898 :     if (it == InstrToCycle.end())
    3580         158 :       continue;
    3581         582 :     LateCycle = std::max(LateCycle, it->second);
    3582        1687 :     for (const auto &SI : SuccSU->Succs)
    3583         814 :       if (SwingSchedulerDAG::isOrder(SuccSU, SI))
    3584         499 :         Worklist.push_back(SI);
    3585         291 :     Visited.insert(SuccSU);
    3586             :   }
    3587         172 :   return LateCycle;
    3588             : }
    3589             : 
    3590             : /// If an instruction has a use that spans multiple iterations, then
    3591             : /// return true. These instructions are characterized by having a back-ege
    3592             : /// to a Phi, which contains a reference to another Phi.
    3593       19315 : static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
    3594       89416 :   for (auto &P : SU->Preds)
    3595       33491 :     if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
    3596       14665 :       for (auto &S : P.getSUnit()->Succs)
    3597        9305 :         if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
    3598             :           return P.getSUnit();
    3599             :   return nullptr;
    3600             : }
    3601             : 
    3602             : /// Compute the scheduling start slot for the instruction.  The start slot
    3603             : /// depends on any predecessor or successor nodes scheduled already.
    3604        1350 : void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
    3605             :                               int *MinEnd, int *MaxStart, int II,
    3606             :                               SwingSchedulerDAG *DAG) {
    3607             :   // Iterate over each instruction that has been scheduled already.  The start
    3608             :   // slot computuation depends on whether the previously scheduled instruction
    3609             :   // is a predecessor or successor of the specified instruction.
    3610       12856 :   for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
    3611             : 
    3612             :     // Iterate over each instruction in the current cycle.
    3613       58491 :     for (SUnit *I : getInstructions(cycle)) {
    3614             :       // Because we're processing a DAG for the dependences, we recognize
    3615             :       // the back-edge in recurrences by anti dependences.
    3616       44249 :       for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
    3617       38630 :         const SDep &Dep = SU->Preds[i];
    3618       19315 :         if (Dep.getSUnit() == I) {
    3619         947 :           if (!DAG->isBackedge(SU, Dep)) {
    3620        1840 :             int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
    3621        2760 :                              DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
    3622         920 :             *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
    3623         920 :             if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
    3624          11 :               int End = earliestCycleInChain(Dep) + (II - 1);
    3625          11 :               *MinEnd = std::min(*MinEnd, End);
    3626             :             }
    3627             :           } else {
    3628          54 :             int LateStart = cycle - DAG->getLatency(SU, Dep) +
    3629          81 :                             DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
    3630          27 :             *MinLateStart = std::min(*MinLateStart, LateStart);
    3631             :           }
    3632             :         }
    3633             :         // For instruction that requires multiple iterations, make sure that
    3634             :         // the dependent instruction is not scheduled past the definition.
    3635       19315 :         SUnit *BE = multipleIterations(I, DAG);
    3636       19471 :         if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
    3637           8 :             !SU->isPred(I))
    3638           8 :           *MinLateStart = std::min(*MinLateStart, cycle);
    3639             :       }
    3640       42452 :       for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
    3641       52554 :         if (SU->Succs[i].getSUnit() == I) {
    3642        1720 :           const SDep &Dep = SU->Succs[i];
    3643         860 :           if (!DAG->isBackedge(SU, Dep)) {
    3644        1320 :             int LateStart = cycle - DAG->getLatency(SU, Dep) +
    3645        1980 :                             DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
    3646         660 :             *MinLateStart = std::min(*MinLateStart, LateStart);
    3647         660 :             if (DAG->isLoopCarriedOrder(SU, Dep)) {
    3648          86 :               int Start = latestCycleInChain(Dep) + 1 - II;
    3649          86 :               *MaxStart = std::max(*MaxStart, Start);
    3650             :             }
    3651             :           } else {
    3652         400 :             int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
    3653         600 :                              DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
    3654         200 :             *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
    3655             :           }
    3656             :         }
    3657             :     }
    3658             :   }
    3659        1350 : }
    3660             : 
    3661             : /// Order the instructions within a cycle so that the definitions occur
    3662             : /// before the uses. Returns true if the instruction is added to the start
    3663             : /// of the list, or false if added to the end.
    3664         808 : bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
    3665             :                                  std::deque<SUnit *> &Insts) {
    3666         808 :   MachineInstr *MI = SU->getInstr();
    3667         808 :   bool OrderBeforeUse = false;
    3668         808 :   bool OrderAfterDef = false;
    3669         808 :   bool OrderBeforeDef = false;
    3670         808 :   unsigned MoveDef = 0;
    3671         808 :   unsigned MoveUse = 0;
    3672         808 :   int StageInst1 = stageScheduled(SU);
    3673             : 
    3674         808 :   unsigned Pos = 0;
    3675        3000 :   for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
    3676             :        ++I, ++Pos) {
    3677             :     // Relative order of Phis does not matter.
    3678         157 :     if (MI->isPHI() && (*I)->getInstr()->isPHI())
    3679          76 :       continue;
    3680        5184 :     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
    3681        3952 :       MachineOperand &MO = MI->getOperand(i);
    3682        3977 :       if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3683         493 :         continue;
    3684        1483 :       unsigned Reg = MO.getReg();
    3685             :       unsigned BasePos, OffsetPos;
    3686        1483 :       if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
    3687         750 :         if (MI->getOperand(BasePos).getReg() == Reg)
    3688         412 :           if (unsigned NewReg = SSD->getInstrBaseReg(SU))
    3689          16 :             Reg = NewReg;
    3690             :       bool Reads, Writes;
    3691        1483 :       std::tie(Reads, Writes) =
    3692        4449 :           (*I)->getInstr()->readsWritesVirtualRegister(Reg);
    3693        1483 :       if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
    3694             :         OrderBeforeUse = true;
    3695             :         MoveUse = Pos;
    3696        1451 :       } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
    3697             :         // Add the instruction after the scheduled instruction.
    3698             :         OrderAfterDef = true;
    3699             :         MoveDef = Pos;
    3700        1362 :       } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
    3701          60 :         if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
    3702             :           OrderBeforeUse = true;
    3703             :           MoveUse = Pos;
    3704             :         } else {
    3705             :           OrderAfterDef = true;
    3706             :           MoveDef = Pos;
    3707             :         }
    3708        2266 :       } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
    3709           7 :         OrderBeforeUse = true;
    3710           7 :         MoveUse = Pos;
    3711           7 :         if (MoveUse != 0) {
    3712           7 :           OrderAfterDef = true;
    3713           7 :           MoveDef = Pos - 1;
    3714             :         }
    3715        2252 :       } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
    3716             :         // Add the instruction before the scheduled instruction.
    3717             :         OrderBeforeUse = true;
    3718             :         MoveUse = Pos;
    3719        1739 :       } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
    3720         442 :                  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
    3721             :         OrderBeforeDef = true;
    3722             :         MoveUse = Pos;
    3723             :       }
    3724             :     }
    3725             :     // Check for order dependences between instructions. Make sure the source
    3726             :     // is ordered before the destination.
    3727        2389 :     for (auto &S : SU->Succs)
    3728         541 :       if (S.getKind() == SDep::Order) {
    3729          69 :         if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
    3730             :           OrderBeforeUse = true;
    3731             :           MoveUse = Pos;
    3732             :         }
    3733         944 :       } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
    3734           0 :         if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
    3735           0 :           if (S.isAssignedRegDep()) {
    3736           0 :             OrderAfterDef = true;
    3737           0 :             MoveDef = Pos;
    3738             :           }
    3739             :         } else {
    3740             :           OrderBeforeUse = true;
    3741             :           MoveUse = Pos;
    3742             :         }
    3743             :       }
    3744        2756 :     for (auto &P : SU->Preds)
    3745         908 :       if (P.getKind() == SDep::Order) {
    3746          43 :         if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
    3747             :           OrderAfterDef = true;
    3748             :           MoveDef = Pos;
    3749             :         }
    3750        1730 :       } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
    3751           0 :         if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
    3752           0 :           if (P.isAssignedRegDep()) {
    3753           0 :             OrderBeforeUse = true;
    3754           0 :             MoveUse = Pos;
    3755             :           }
    3756             :         } else {
    3757             :           OrderAfterDef = true;
    3758             :           MoveDef = Pos;
    3759             :         }
    3760             :       }
    3761             :   }
    3762             : 
    3763             :   // A circular dependence.
    3764         808 :   if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
    3765           0 :     OrderBeforeUse = false;
    3766             : 
    3767             :   // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
    3768             :   // to a loop-carried dependence.
    3769         808 :   if (OrderBeforeDef)
    3770          59 :     OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
    3771             : 
    3772             :   // The uncommon case when the instruction order needs to be updated because
    3773             :   // there is both a use and def.
    3774         808 :   if (OrderBeforeUse && OrderAfterDef) {
    3775          44 :     SUnit *UseSU = Insts.at(MoveUse);
    3776          44 :     SUnit *DefSU = Insts.at(MoveDef);
    3777          44 :     if (MoveUse > MoveDef) {
    3778         176 :       Insts.erase(Insts.begin() + MoveUse);
    3779         176 :       Insts.erase(Insts.begin() + MoveDef);
    3780             :     } else {
    3781           0 :       Insts.erase(Insts.begin() + MoveDef);
    3782           0 :       Insts.erase(Insts.begin() + MoveUse);
    3783             :     }
    3784          44 :     if (orderDependence(SSD, UseSU, Insts)) {
    3785           0 :       Insts.push_front(SU);
    3786           0 :       orderDependence(SSD, DefSU, Insts);
    3787           0 :       return true;
    3788             :     }
    3789          44 :     Insts.pop_back();
    3790          44 :     Insts.push_back(SU);
    3791          44 :     Insts.push_back(UseSU);
    3792          44 :     orderDependence(SSD, DefSU, Insts);
    3793          44 :     return false;
    3794             :   }
    3795             :   // Put the new instruction first if there is a use in the list. Otherwise,
    3796             :   // put it at the end of the list.
    3797         764 :   if (OrderBeforeUse)
    3798          95 :     Insts.push_front(SU);
    3799             :   else
    3800         669 :     Insts.push_back(SU);
    3801             :   return OrderBeforeUse;
    3802             : }
    3803             : 
    3804             : /// Return true if the scheduled Phi has a loop carried operand.
    3805        1076 : bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
    3806             :   if (!Phi.isPHI())
    3807             :     return false;
    3808             :   assert(Phi.isPHI() && "Expecing a Phi.");
    3809        1608 :   SUnit *DefSU = SSD->getSUnit(&Phi);
    3810         804 :   unsigned DefCycle = cycleScheduled(DefSU);
    3811         804 :   int DefStage = stageScheduled(DefSU);
    3812             : 
    3813         804 :   unsigned InitVal = 0;
    3814         804 :   unsigned LoopVal = 0;
    3815        1608 :   getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
    3816        1608 :   SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
    3817         798 :   if (!UseSU)
    3818             :     return true;
    3819        1582 :   if (UseSU->getInstr()->isPHI())
    3820             :     return true;
    3821         784 :   unsigned LoopCycle = cycleScheduled(UseSU);
    3822         784 :   int LoopStage = stageScheduled(UseSU);
    3823         784 :   return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
    3824             : }
    3825             : 
    3826             : /// Return true if the instruction is a definition that is loop carried
    3827             : /// and defines the use on the next iteration.
    3828             : ///        v1 = phi(v2, v3)
    3829             : ///  (Def) v3 = op v1
    3830             : ///  (MO)   = v1
    3831             : /// If MO appears before Def, then then v1 and v3 may get assigned to the same
    3832             : /// register.
    3833         442 : bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
    3834             :                                        MachineInstr *Def, MachineOperand &MO) {
    3835         442 :   if (!MO.isReg())
    3836             :     return false;
    3837         442 :   if (Def->isPHI())
    3838             :     return false;
    3839         442 :   MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
    3840         636 :   if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
    3841             :     return false;
    3842         193 :   if (!isLoopCarried(SSD, *Phi))
    3843             :     return false;
    3844         382 :   unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
    3845         640 :   for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
    3846        1018 :     MachineOperand &DMO = Def->getOperand(i);
    3847        1223 :     if (!DMO.isReg() || !DMO.isDef())
    3848         311 :       continue;
    3849         198 :     if (DMO.getReg() == LoopReg)
    3850             :       return true;
    3851             :   }
    3852             :   return false;
    3853             : }
    3854             : 
    3855             : // Check if the generated schedule is valid. This function checks if
    3856             : // an instruction that uses a physical register is scheduled in a
    3857             : // different stage than the definition. The pipeliner does not handle
    3858             : // physical register values that may cross a basic block boundary.
    3859         108 : bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
    3860         936 :   for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
    3861        1440 :     SUnit &SU = SSD->SUnits[i];
    3862         720 :     if (!SU.hasPhysRegDefs)
    3863         720 :       continue;
    3864           0 :     int StageDef = stageScheduled(&SU);
    3865             :     assert(StageDef != -1 && "Instruction should have been scheduled.");
    3866           0 :     for (auto &SI : SU.Succs)
    3867           0 :       if (SI.isAssignedRegDep())
    3868           0 :         if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
    3869           0 :           if (stageScheduled(SI.getSUnit()) != StageDef)
    3870             :             return false;
    3871             :   }
    3872             :   return true;
    3873             : }
    3874             : 
    3875             : /// After the schedule has been formed, call this function to combine
    3876             : /// the instructions from the different stages/cycles.  That is, this
    3877             : /// function creates a schedule that represents a single iteration.
    3878         108 : void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
    3879             :   // Move all instructions to the first stage from later stages.
    3880         628 :   for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
    3881         596 :     for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
    3882             :          ++stage) {
    3883             :       std::deque<SUnit *> &cycleInstrs =
    3884         368 :           ScheduledInstrs[cycle + (stage * InitiationInterval)];
    3885         184 :       for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
    3886             :                                                  E = cycleInstrs.rend();
    3887         397 :            I != E; ++I)
    3888         639 :         ScheduledInstrs[cycle].push_front(*I);
    3889             :     }
    3890             :   }
    3891             :   // Iterate over the definitions in each instruction, and compute the
    3892             :   // stage difference for each use.  Keep the maximum value.
    3893        1044 :   for (auto &I : InstrToCycle) {
    3894         720 :     int DefStage = stageScheduled(I.first);
    3895         720 :     MachineInstr *MI = I.first->getInstr();
    3896        3445 :     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
    3897        5450 :       MachineOperand &Op = MI->getOperand(i);
    3898        4681 :       if (!Op.isReg() || !Op.isDef())
    3899        2036 :         continue;
    3900             : 
    3901         689 :       unsigned Reg = Op.getReg();
    3902         689 :       unsigned MaxDiff = 0;
    3903         689 :       bool PhiIsSwapped = false;
    3904         689 :       for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
    3905         689 :                                              EI = MRI.use_end();
    3906        1617 :            UI != EI; ++UI) {
    3907         928 :         MachineOperand &UseOp = *UI;
    3908         928 :         MachineInstr *UseMI = UseOp.getParent();
    3909        1856 :         SUnit *SUnitUse = SSD->getSUnit(UseMI);
    3910         928 :         int UseStage = stageScheduled(SUnitUse);
    3911         928 :         unsigned Diff = 0;
    3912         928 :         if (UseStage != -1 && UseStage >= DefStage)
    3913         884 :           Diff = UseStage - DefStage;
    3914             :         if (MI->isPHI()) {
    3915         339 :           if (isLoopCarried(SSD, *MI))
    3916         336 :             ++Diff;
    3917             :           else
    3918             :             PhiIsSwapped = true;
    3919             :         }
    3920         928 :         MaxDiff = std::max(Diff, MaxDiff);
    3921             :       }
    3922        2067 :       RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
    3923             :     }
    3924             :   }
    3925             : 
    3926             :   // Erase all the elements in the later stages. Only one iteration should
    3927             :   // remain in the scheduled list, and it contains all the instructions.
    3928         374 :   for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
    3929         158 :     ScheduledInstrs.erase(cycle);
    3930             : 
    3931             :   // Change the registers in instruction as specified in the InstrChanges
    3932             :   // map. We need to use the new registers to create the correct order.
    3933         936 :   for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
    3934        1440 :     SUnit *SU = &SSD->SUnits[i];
    3935         720 :     SSD->applyInstrChange(SU->getInstr(), *this, true);
    3936             :   }
    3937             : 
    3938             :   // Reorder the instructions in each cycle to fix and improve the
    3939             :   // generated code.
    3940         422 :   for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
    3941         412 :     std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
    3942         412 :     std::deque<SUnit *> newOrderZC;
    3943             :     // Put the zero-cost, pseudo instructions at the start of the cycle.
    3944        1132 :     for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
    3945        1440 :       SUnit *SU = cycleInstrs[i];
    3946        1440 :       if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
    3947         195 :         orderDependence(SSD, SU, newOrderZC);
    3948             :     }
    3949         412 :     std::deque<SUnit *> newOrderI;
    3950             :     // Then, add the regular instructions back.
    3951        1132 :     for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
    3952        1440 :       SUnit *SU = cycleInstrs[i];
    3953        1440 :       if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
    3954         525 :         orderDependence(SSD, SU, newOrderI);
    3955             :     }
    3956             :     // Replace the old order with the new order.
    3957         206 :     cycleInstrs.swap(newOrderZC);
    3958         824 :     cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
    3959             :   }
    3960             : 
    3961             :   DEBUG(dump(););
    3962      217026 : }
    3963             : 
    3964             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    3965             : /// Print the schedule information to the given output.
    3966             : void SMSchedule::print(raw_ostream &os) const {
    3967             :   // Iterate over each cycle.
    3968             :   for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
    3969             :     // Iterate over each instruction in the cycle.
    3970             :     const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
    3971             :     for (SUnit *CI : cycleInstrs->second) {
    3972             :       os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
    3973             :       os << "(" << CI->NodeNum << ") ";
    3974             :       CI->getInstr()->print(os);
    3975             :       os << "\n";
    3976             :     }
    3977             :   }
    3978             : }
    3979             : 
    3980             : /// Utility function used for debugging to print the schedule.
    3981             : LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
    3982             : #endif

Generated by: LCOV version 1.13