LLVM  7.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and exploits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/MapVector.h"
65 #include "llvm/ADT/PriorityQueue.h"
66 #include "llvm/ADT/SetVector.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/Statistic.h"
96 #include "llvm/Config/llvm-config.h"
97 #include "llvm/IR/Attributes.h"
98 #include "llvm/IR/DebugLoc.h"
99 #include "llvm/IR/Function.h"
100 #include "llvm/MC/LaneBitmask.h"
101 #include "llvm/MC/MCInstrDesc.h"
103 #include "llvm/MC/MCRegisterInfo.h"
104 #include "llvm/Pass.h"
106 #include "llvm/Support/Compiler.h"
107 #include "llvm/Support/Debug.h"
108 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <climits>
113 #include <cstdint>
114 #include <deque>
115 #include <functional>
116 #include <iterator>
117 #include <map>
118 #include <memory>
119 #include <tuple>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 
125 #define DEBUG_TYPE "pipeliner"
126 
127 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
128 STATISTIC(NumPipelined, "Number of loops software pipelined");
129 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
130 
131 /// A command line option to turn software pipelining on or off.
132 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
134  cl::desc("Enable Software Pipelining"));
135 
136 /// A command line option to enable SWP at -Os.
137 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
138  cl::desc("Enable SWP at Os."), cl::Hidden,
139  cl::init(false));
140 
141 /// A command line argument to limit minimum initial interval for pipelining.
142 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
143  cl::desc("Size limit for the MII."),
144  cl::Hidden, cl::init(27));
145 
146 /// A command line argument to limit the number of stages in the pipeline.
147 static cl::opt<int>
148  SwpMaxStages("pipeliner-max-stages",
149  cl::desc("Maximum stages allowed in the generated scheduled."),
150  cl::Hidden, cl::init(3));
151 
152 /// A command line option to disable the pruning of chain dependences due to
153 /// an unrelated Phi.
154 static cl::opt<bool>
155  SwpPruneDeps("pipeliner-prune-deps",
156  cl::desc("Prune dependences between unrelated Phi nodes."),
157  cl::Hidden, cl::init(true));
158 
159 /// A command line option to disable the pruning of loop carried order
160 /// dependences.
161 static cl::opt<bool>
162  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
163  cl::desc("Prune loop carried order dependences."),
164  cl::Hidden, cl::init(true));
165 
166 #ifndef NDEBUG
167 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
168 #endif
169 
170 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
171  cl::ReallyHidden, cl::init(false),
172  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
173 
174 namespace {
175 
176 class NodeSet;
177 class SMSchedule;
178 
179 /// The main class in the implementation of the target independent
180 /// software pipeliner pass.
181 class MachinePipeliner : public MachineFunctionPass {
182 public:
183  MachineFunction *MF = nullptr;
184  const MachineLoopInfo *MLI = nullptr;
185  const MachineDominatorTree *MDT = nullptr;
186  const InstrItineraryData *InstrItins;
187  const TargetInstrInfo *TII = nullptr;
188  RegisterClassInfo RegClassInfo;
189 
190 #ifndef NDEBUG
191  static int NumTries;
192 #endif
193 
194  /// Cache the target analysis information about the loop.
195  struct LoopInfo {
196  MachineBasicBlock *TBB = nullptr;
197  MachineBasicBlock *FBB = nullptr;
199  MachineInstr *LoopInductionVar = nullptr;
200  MachineInstr *LoopCompare = nullptr;
201  };
202  LoopInfo LI;
203 
204  static char ID;
205 
206  MachinePipeliner() : MachineFunctionPass(ID) {
208  }
209 
210  bool runOnMachineFunction(MachineFunction &MF) override;
211 
212  void getAnalysisUsage(AnalysisUsage &AU) const override {
219  }
220 
221 private:
222  void preprocessPhiNodes(MachineBasicBlock &B);
223  bool canPipelineLoop(MachineLoop &L);
224  bool scheduleLoop(MachineLoop &L);
225  bool swingModuloScheduler(MachineLoop &L);
226 };
227 
228 /// This class builds the dependence graph for the instructions in a loop,
229 /// and attempts to schedule the instructions using the SMS algorithm.
230 class SwingSchedulerDAG : public ScheduleDAGInstrs {
231  MachinePipeliner &Pass;
232  /// The minimum initiation interval between iterations for this schedule.
233  unsigned MII = 0;
234  /// Set to true if a valid pipelined schedule is found for the loop.
235  bool Scheduled = false;
236  MachineLoop &Loop;
237  LiveIntervals &LIS;
238  const RegisterClassInfo &RegClassInfo;
239 
240  /// A toplogical ordering of the SUnits, which is needed for changing
241  /// dependences and iterating over the SUnits.
243 
244  struct NodeInfo {
245  int ASAP = 0;
246  int ALAP = 0;
247  int ZeroLatencyDepth = 0;
248  int ZeroLatencyHeight = 0;
249 
250  NodeInfo() = default;
251  };
252  /// Computed properties for each node in the graph.
253  std::vector<NodeInfo> ScheduleInfo;
254 
255  enum OrderKind { BottomUp = 0, TopDown = 1 };
256  /// Computed node ordering for scheduling.
258 
259  using NodeSetType = SmallVector<NodeSet, 8>;
260  using ValueMapTy = DenseMap<unsigned, unsigned>;
261  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
262  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
263 
264  /// Instructions to change when emitting the final schedule.
266 
267  /// We may create a new instruction, so remember it because it
268  /// must be deleted when the pass is finished.
270 
271  /// Ordered list of DAG postprocessing steps.
272  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
273 
274  /// Helper class to implement Johnson's circuit finding algorithm.
275  class Circuits {
276  std::vector<SUnit> &SUnits;
277  SetVector<SUnit *> Stack;
278  BitVector Blocked;
281  unsigned NumPaths;
282  static unsigned MaxPaths;
283 
284  public:
285  Circuits(std::vector<SUnit> &SUs)
286  : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
287 
288  /// Reset the data structures used in the circuit algorithm.
289  void reset() {
290  Stack.clear();
291  Blocked.reset();
292  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
293  NumPaths = 0;
294  }
295 
296  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
297  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
298  void unblock(int U);
299  };
300 
301 public:
302  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
303  const RegisterClassInfo &rci)
304  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
305  RegClassInfo(rci), Topo(SUnits, &ExitSU) {
306  P.MF->getSubtarget().getSMSMutations(Mutations);
307  }
308 
309  void schedule() override;
310  void finishBlock() override;
311 
312  /// Return true if the loop kernel has been scheduled.
313  bool hasNewSchedule() { return Scheduled; }
314 
315  /// Return the earliest time an instruction may be scheduled.
316  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
317 
318  /// Return the latest time an instruction my be scheduled.
319  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
320 
321  /// The mobility function, which the number of slots in which
322  /// an instruction may be scheduled.
323  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
324 
325  /// The depth, in the dependence graph, for a node.
326  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
327 
328  /// The maximum unweighted length of a path from an arbitrary node to the
329  /// given node in which each edge has latency 0
330  int getZeroLatencyDepth(SUnit *Node) {
331  return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
332  }
333 
334  /// The height, in the dependence graph, for a node.
335  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
336 
337  /// The maximum unweighted length of a path from the given node to an
338  /// arbitrary node in which each edge has latency 0
339  int getZeroLatencyHeight(SUnit *Node) {
340  return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
341  }
342 
343  /// Return true if the dependence is a back-edge in the data dependence graph.
344  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
345  /// using an anti dependence from a Phi to an instruction.
346  bool isBackedge(SUnit *Source, const SDep &Dep) {
347  if (Dep.getKind() != SDep::Anti)
348  return false;
349  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
350  }
351 
352  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
353 
354  /// The distance function, which indicates that operation V of iteration I
355  /// depends on operations U of iteration I-distance.
356  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
357  // Instructions that feed a Phi have a distance of 1. Computing larger
358  // values for arrays requires data dependence information.
359  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
360  return 1;
361  return 0;
362  }
363 
364  /// Set the Minimum Initiation Interval for this schedule attempt.
365  void setMII(unsigned mii) { MII = mii; }
366 
367  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
368 
369  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
370 
371  /// Return the new base register that was stored away for the changed
372  /// instruction.
373  unsigned getInstrBaseReg(SUnit *SU) {
375  InstrChanges.find(SU);
376  if (It != InstrChanges.end())
377  return It->second.first;
378  return 0;
379  }
380 
381  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
382  Mutations.push_back(std::move(Mutation));
383  }
384 
385 private:
386  void addLoopCarriedDependences(AliasAnalysis *AA);
387  void updatePhiDependences();
388  void changeDependences();
389  unsigned calculateResMII();
390  unsigned calculateRecMII(NodeSetType &RecNodeSets);
391  void findCircuits(NodeSetType &NodeSets);
392  void fuseRecs(NodeSetType &NodeSets);
393  void removeDuplicateNodes(NodeSetType &NodeSets);
394  void computeNodeFunctions(NodeSetType &NodeSets);
395  void registerPressureFilter(NodeSetType &NodeSets);
396  void colocateNodeSets(NodeSetType &NodeSets);
397  void checkNodeSets(NodeSetType &NodeSets);
398  void groupRemainingNodes(NodeSetType &NodeSets);
399  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
400  SetVector<SUnit *> &NodesAdded);
401  void computeNodeOrder(NodeSetType &NodeSets);
402  void checkValidNodeOrder(const NodeSetType &Circuits) const;
403  bool schedulePipeline(SMSchedule &Schedule);
404  void generatePipelinedLoop(SMSchedule &Schedule);
405  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
406  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
407  MBBVectorTy &PrologBBs);
408  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
409  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
410  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
411  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
412  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
413  SMSchedule &Schedule, ValueMapTy *VRMap,
414  InstrMapTy &InstrMap, unsigned LastStageNum,
415  unsigned CurStageNum, bool IsLast);
416  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
417  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
418  SMSchedule &Schedule, ValueMapTy *VRMap,
419  InstrMapTy &InstrMap, unsigned LastStageNum,
420  unsigned CurStageNum, bool IsLast);
421  void removeDeadInstructions(MachineBasicBlock *KernelBB,
422  MBBVectorTy &EpilogBBs);
423  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
424  SMSchedule &Schedule);
425  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
426  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
427  ValueMapTy *VRMap);
428  bool computeDelta(MachineInstr &MI, unsigned &Delta);
429  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
430  unsigned Num);
431  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
432  unsigned InstStageNum);
433  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
434  unsigned InstStageNum,
435  SMSchedule &Schedule);
436  void updateInstruction(MachineInstr *NewMI, bool LastDef,
437  unsigned CurStageNum, unsigned InstStageNum,
438  SMSchedule &Schedule, ValueMapTy *VRMap);
439  MachineInstr *findDefInLoop(unsigned Reg);
440  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
441  unsigned LoopStage, ValueMapTy *VRMap,
442  MachineBasicBlock *BB);
443  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
444  SMSchedule &Schedule, ValueMapTy *VRMap,
445  InstrMapTy &InstrMap);
446  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
447  InstrMapTy &InstrMap, unsigned CurStageNum,
448  unsigned PhiNum, MachineInstr *Phi,
449  unsigned OldReg, unsigned NewReg,
450  unsigned PrevReg = 0);
451  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
452  unsigned &OffsetPos, unsigned &NewBase,
453  int64_t &NewOffset);
454  void postprocessDAG();
455 };
456 
457 /// A NodeSet contains a set of SUnit DAG nodes with additional information
458 /// that assigns a priority to the set.
459 class NodeSet {
460  SetVector<SUnit *> Nodes;
461  bool HasRecurrence = false;
462  unsigned RecMII = 0;
463  int MaxMOV = 0;
464  unsigned MaxDepth = 0;
465  unsigned Colocate = 0;
466  SUnit *ExceedPressure = nullptr;
467  unsigned Latency = 0;
468 
469 public:
470  using iterator = SetVector<SUnit *>::const_iterator;
471 
472  NodeSet() = default;
473  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
474  Latency = 0;
475  for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
476  for (const SDep &Succ : Nodes[i]->Succs)
477  if (Nodes.count(Succ.getSUnit()))
478  Latency += Succ.getLatency();
479  }
480 
481  bool insert(SUnit *SU) { return Nodes.insert(SU); }
482 
483  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
484 
485  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
486  return Nodes.remove_if(P);
487  }
488 
489  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
490 
491  bool hasRecurrence() { return HasRecurrence; };
492 
493  unsigned size() const { return Nodes.size(); }
494 
495  bool empty() const { return Nodes.empty(); }
496 
497  SUnit *getNode(unsigned i) const { return Nodes[i]; };
498 
499  void setRecMII(unsigned mii) { RecMII = mii; };
500 
501  void setColocate(unsigned c) { Colocate = c; };
502 
503  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
504 
505  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
506 
507  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
508 
509  int getRecMII() { return RecMII; }
510 
511  /// Summarize node functions for the entire node set.
512  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
513  for (SUnit *SU : *this) {
514  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
515  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
516  }
517  }
518 
519  unsigned getLatency() { return Latency; }
520 
521  unsigned getMaxDepth() { return MaxDepth; }
522 
523  void clear() {
524  Nodes.clear();
525  RecMII = 0;
526  HasRecurrence = false;
527  MaxMOV = 0;
528  MaxDepth = 0;
529  Colocate = 0;
530  ExceedPressure = nullptr;
531  }
532 
533  operator SetVector<SUnit *> &() { return Nodes; }
534 
535  /// Sort the node sets by importance. First, rank them by recurrence MII,
536  /// then by mobility (least mobile done first), and finally by depth.
537  /// Each node set may contain a colocate value which is used as the first
538  /// tie breaker, if it's set.
539  bool operator>(const NodeSet &RHS) const {
540  if (RecMII == RHS.RecMII) {
541  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
542  return Colocate < RHS.Colocate;
543  if (MaxMOV == RHS.MaxMOV)
544  return MaxDepth > RHS.MaxDepth;
545  return MaxMOV < RHS.MaxMOV;
546  }
547  return RecMII > RHS.RecMII;
548  }
549 
550  bool operator==(const NodeSet &RHS) const {
551  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
552  MaxDepth == RHS.MaxDepth;
553  }
554 
555  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
556 
557  iterator begin() { return Nodes.begin(); }
558  iterator end() { return Nodes.end(); }
559 
560  void print(raw_ostream &os) const {
561  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
562  << " depth " << MaxDepth << " col " << Colocate << "\n";
563  for (const auto &I : Nodes)
564  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
565  os << "\n";
566  }
567 
568 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
569  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
570 #endif
571 };
572 
573 /// This class represents the scheduled code. The main data structure is a
574 /// map from scheduled cycle to instructions. During scheduling, the
575 /// data structure explicitly represents all stages/iterations. When
576 /// the algorithm finshes, the schedule is collapsed into a single stage,
577 /// which represents instructions from different loop iterations.
578 ///
579 /// The SMS algorithm allows negative values for cycles, so the first cycle
580 /// in the schedule is the smallest cycle value.
581 class SMSchedule {
582 private:
583  /// Map from execution cycle to instructions.
584  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
585 
586  /// Map from instruction to execution cycle.
587  std::map<SUnit *, int> InstrToCycle;
588 
589  /// Map for each register and the max difference between its uses and def.
590  /// The first element in the pair is the max difference in stages. The
591  /// second is true if the register defines a Phi value and loop value is
592  /// scheduled before the Phi.
593  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
594 
595  /// Keep track of the first cycle value in the schedule. It starts
596  /// as zero, but the algorithm allows negative values.
597  int FirstCycle = 0;
598 
599  /// Keep track of the last cycle value in the schedule.
600  int LastCycle = 0;
601 
602  /// The initiation interval (II) for the schedule.
603  int InitiationInterval = 0;
604 
605  /// Target machine information.
606  const TargetSubtargetInfo &ST;
607 
608  /// Virtual register information.
610 
611  std::unique_ptr<DFAPacketizer> Resources;
612 
613 public:
614  SMSchedule(MachineFunction *mf)
615  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
616  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
617 
618  void reset() {
619  ScheduledInstrs.clear();
620  InstrToCycle.clear();
621  RegToStageDiff.clear();
622  FirstCycle = 0;
623  LastCycle = 0;
624  InitiationInterval = 0;
625  }
626 
627  /// Set the initiation interval for this schedule.
628  void setInitiationInterval(int ii) { InitiationInterval = ii; }
629 
630  /// Return the first cycle in the completed schedule. This
631  /// can be a negative value.
632  int getFirstCycle() const { return FirstCycle; }
633 
634  /// Return the last cycle in the finalized schedule.
635  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
636 
637  /// Return the cycle of the earliest scheduled instruction in the dependence
638  /// chain.
639  int earliestCycleInChain(const SDep &Dep);
640 
641  /// Return the cycle of the latest scheduled instruction in the dependence
642  /// chain.
643  int latestCycleInChain(const SDep &Dep);
644 
645  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
646  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
647  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
648 
649  /// Iterators for the cycle to instruction map.
650  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
651  using const_sched_iterator =
652  DenseMap<int, std::deque<SUnit *>>::const_iterator;
653 
654  /// Return true if the instruction is scheduled at the specified stage.
655  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
656  return (stageScheduled(SU) == (int)StageNum);
657  }
658 
659  /// Return the stage for a scheduled instruction. Return -1 if
660  /// the instruction has not been scheduled.
661  int stageScheduled(SUnit *SU) const {
662  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
663  if (it == InstrToCycle.end())
664  return -1;
665  return (it->second - FirstCycle) / InitiationInterval;
666  }
667 
668  /// Return the cycle for a scheduled instruction. This function normalizes
669  /// the first cycle to be 0.
670  unsigned cycleScheduled(SUnit *SU) const {
671  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
672  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
673  return (it->second - FirstCycle) % InitiationInterval;
674  }
675 
676  /// Return the maximum stage count needed for this schedule.
677  unsigned getMaxStageCount() {
678  return (LastCycle - FirstCycle) / InitiationInterval;
679  }
680 
681  /// Return the max. number of stages/iterations that can occur between a
682  /// register definition and its uses.
683  unsigned getStagesForReg(int Reg, unsigned CurStage) {
684  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
685  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
686  return 1;
687  return Stages.first;
688  }
689 
690  /// The number of stages for a Phi is a little different than other
691  /// instructions. The minimum value computed in RegToStageDiff is 1
692  /// because we assume the Phi is needed for at least 1 iteration.
693  /// This is not the case if the loop value is scheduled prior to the
694  /// Phi in the same stage. This function returns the number of stages
695  /// or iterations needed between the Phi definition and any uses.
696  unsigned getStagesForPhi(int Reg) {
697  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
698  if (Stages.second)
699  return Stages.first;
700  return Stages.first - 1;
701  }
702 
703  /// Return the instructions that are scheduled at the specified cycle.
704  std::deque<SUnit *> &getInstructions(int cycle) {
705  return ScheduledInstrs[cycle];
706  }
707 
708  bool isValidSchedule(SwingSchedulerDAG *SSD);
709  void finalizeSchedule(SwingSchedulerDAG *SSD);
710  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
711  std::deque<SUnit *> &Insts);
712  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
713  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
714  MachineOperand &MO);
715  void print(raw_ostream &os) const;
716  void dump() const;
717 };
718 
719 } // end anonymous namespace
720 
721 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
722 char MachinePipeliner::ID = 0;
723 #ifndef NDEBUG
724 int MachinePipeliner::NumTries = 0;
725 #endif
727 
728 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
729  "Modulo Software Pipelining", false, false)
734 INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
735  "Modulo Software Pipelining", false, false)
736 
737 /// The "main" function for implementing Swing Modulo Scheduling.
738 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
739  if (skipFunction(mf.getFunction()))
740  return false;
741 
742  if (!EnableSWP)
743  return false;
744 
745  if (mf.getFunction().getAttributes().hasAttribute(
746  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
747  !EnableSWPOptSize.getPosition())
748  return false;
749 
750  MF = &mf;
751  MLI = &getAnalysis<MachineLoopInfo>();
752  MDT = &getAnalysis<MachineDominatorTree>();
753  TII = MF->getSubtarget().getInstrInfo();
754  RegClassInfo.runOnMachineFunction(*MF);
755 
756  for (auto &L : *MLI)
757  scheduleLoop(*L);
758 
759  return false;
760 }
761 
762 /// Attempt to perform the SMS algorithm on the specified loop. This function is
763 /// the main entry point for the algorithm. The function identifies candidate
764 /// loops, calculates the minimum initiation interval, and attempts to schedule
765 /// the loop.
766 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
767  bool Changed = false;
768  for (auto &InnerLoop : L)
769  Changed |= scheduleLoop(*InnerLoop);
770 
771 #ifndef NDEBUG
772  // Stop trying after reaching the limit (if any).
773  int Limit = SwpLoopLimit;
774  if (Limit >= 0) {
775  if (NumTries >= SwpLoopLimit)
776  return Changed;
777  NumTries++;
778  }
779 #endif
780 
781  if (!canPipelineLoop(L))
782  return Changed;
783 
784  ++NumTrytoPipeline;
785 
786  Changed = swingModuloScheduler(L);
787 
788  return Changed;
789 }
790 
791 /// Return true if the loop can be software pipelined. The algorithm is
792 /// restricted to loops with a single basic block. Make sure that the
793 /// branch in the loop can be analyzed.
794 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
795  if (L.getNumBlocks() != 1)
796  return false;
797 
798  // Check if the branch can't be understood because we can't do pipelining
799  // if that's the case.
800  LI.TBB = nullptr;
801  LI.FBB = nullptr;
802  LI.BrCond.clear();
803  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
804  return false;
805 
806  LI.LoopInductionVar = nullptr;
807  LI.LoopCompare = nullptr;
808  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
809  return false;
810 
811  if (!L.getLoopPreheader())
812  return false;
813 
814  // Remove any subregisters from inputs to phi nodes.
815  preprocessPhiNodes(*L.getHeader());
816  return true;
817 }
818 
819 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
820  MachineRegisterInfo &MRI = MF->getRegInfo();
821  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
822 
823  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
824  MachineOperand &DefOp = PI.getOperand(0);
825  assert(DefOp.getSubReg() == 0);
826  auto *RC = MRI.getRegClass(DefOp.getReg());
827 
828  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
829  MachineOperand &RegOp = PI.getOperand(i);
830  if (RegOp.getSubReg() == 0)
831  continue;
832 
833  // If the operand uses a subregister, replace it with a new register
834  // without subregisters, and generate a copy to the new register.
835  unsigned NewReg = MRI.createVirtualRegister(RC);
836  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
838  const DebugLoc &DL = PredB.findDebugLoc(At);
839  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
840  .addReg(RegOp.getReg(), getRegState(RegOp),
841  RegOp.getSubReg());
842  Slots.insertMachineInstrInMaps(*Copy);
843  RegOp.setReg(NewReg);
844  RegOp.setSubReg(0);
845  }
846  }
847 }
848 
849 /// The SMS algorithm consists of the following main steps:
850 /// 1. Computation and analysis of the dependence graph.
851 /// 2. Ordering of the nodes (instructions).
852 /// 3. Attempt to Schedule the loop.
853 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
854  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
855 
856  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
857 
858  MachineBasicBlock *MBB = L.getHeader();
859  // The kernel should not include any terminator instructions. These
860  // will be added back later.
861  SMS.startBlock(MBB);
862 
863  // Compute the number of 'real' instructions in the basic block by
864  // ignoring terminators.
865  unsigned size = MBB->size();
867  E = MBB->instr_end();
868  I != E; ++I, --size)
869  ;
870 
871  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
872  SMS.schedule();
873  SMS.exitRegion();
874 
875  SMS.finishBlock();
876  return SMS.hasNewSchedule();
877 }
878 
879 /// We override the schedule function in ScheduleDAGInstrs to implement the
880 /// scheduling part of the Swing Modulo Scheduling algorithm.
881 void SwingSchedulerDAG::schedule() {
882  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
883  buildSchedGraph(AA);
884  addLoopCarriedDependences(AA);
885  updatePhiDependences();
887  postprocessDAG();
888  changeDependences();
889  LLVM_DEBUG({
890  for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
891  SUnits[su].dumpAll(this);
892  });
893 
894  NodeSetType NodeSets;
895  findCircuits(NodeSets);
896  NodeSetType Circuits = NodeSets;
897 
898  // Calculate the MII.
899  unsigned ResMII = calculateResMII();
900  unsigned RecMII = calculateRecMII(NodeSets);
901 
902  fuseRecs(NodeSets);
903 
904  // This flag is used for testing and can cause correctness problems.
905  if (SwpIgnoreRecMII)
906  RecMII = 0;
907 
908  MII = std::max(ResMII, RecMII);
909  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
910  << ", res=" << ResMII << ")\n");
911 
912  // Can't schedule a loop without a valid MII.
913  if (MII == 0)
914  return;
915 
916  // Don't pipeline large loops.
917  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
918  return;
919 
920  computeNodeFunctions(NodeSets);
921 
922  registerPressureFilter(NodeSets);
923 
924  colocateNodeSets(NodeSets);
925 
926  checkNodeSets(NodeSets);
927 
928  LLVM_DEBUG({
929  for (auto &I : NodeSets) {
930  dbgs() << " Rec NodeSet ";
931  I.dump();
932  }
933  });
934 
935  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
936 
937  groupRemainingNodes(NodeSets);
938 
939  removeDuplicateNodes(NodeSets);
940 
941  LLVM_DEBUG({
942  for (auto &I : NodeSets) {
943  dbgs() << " NodeSet ";
944  I.dump();
945  }
946  });
947 
948  computeNodeOrder(NodeSets);
949 
950  // check for node order issues
951  checkValidNodeOrder(Circuits);
952 
953  SMSchedule Schedule(Pass.MF);
954  Scheduled = schedulePipeline(Schedule);
955 
956  if (!Scheduled)
957  return;
958 
959  unsigned numStages = Schedule.getMaxStageCount();
960  // No need to generate pipeline if there are no overlapped iterations.
961  if (numStages == 0)
962  return;
963 
964  // Check that the maximum stage count is less than user-defined limit.
965  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
966  return;
967 
968  generatePipelinedLoop(Schedule);
969  ++NumPipelined;
970 }
971 
972 /// Clean up after the software pipeliner runs.
973 void SwingSchedulerDAG::finishBlock() {
974  for (MachineInstr *I : NewMIs)
975  MF.DeleteMachineInstr(I);
976  NewMIs.clear();
977 
978  // Call the superclass.
980 }
981 
982 /// Return the register values for the operands of a Phi instruction.
983 /// This function assume the instruction is a Phi.
984 static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
985  unsigned &InitVal, unsigned &LoopVal) {
986  assert(Phi.isPHI() && "Expecting a Phi.");
987 
988  InitVal = 0;
989  LoopVal = 0;
990  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
991  if (Phi.getOperand(i + 1).getMBB() != Loop)
992  InitVal = Phi.getOperand(i).getReg();
993  else
994  LoopVal = Phi.getOperand(i).getReg();
995 
996  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
997 }
998 
999 /// Return the Phi register value that comes from the incoming block.
1000 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1001  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1002  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
1003  return Phi.getOperand(i).getReg();
1004  return 0;
1005 }
1006 
1007 /// Return the Phi register value that comes the loop block.
1008 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1009  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1010  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
1011  return Phi.getOperand(i).getReg();
1012  return 0;
1013 }
1014 
1015 /// Return true if SUb can be reached from SUa following the chain edges.
1016 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
1017  SmallPtrSet<SUnit *, 8> Visited;
1018  SmallVector<SUnit *, 8> Worklist;
1019  Worklist.push_back(SUa);
1020  while (!Worklist.empty()) {
1021  const SUnit *SU = Worklist.pop_back_val();
1022  for (auto &SI : SU->Succs) {
1023  SUnit *SuccSU = SI.getSUnit();
1024  if (SI.getKind() == SDep::Order) {
1025  if (Visited.count(SuccSU))
1026  continue;
1027  if (SuccSU == SUb)
1028  return true;
1029  Worklist.push_back(SuccSU);
1030  Visited.insert(SuccSU);
1031  }
1032  }
1033  }
1034  return false;
1035 }
1036 
1037 /// Return true if the instruction causes a chain between memory
1038 /// references before and after it.
1040  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1041  (MI.hasOrderedMemoryRef() &&
1042  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1043 }
1044 
1045 /// Return the underlying objects for the memory references of an instruction.
1046 /// This function calls the code in ValueTracking, but first checks that the
1047 /// instruction has a memory operand.
1050  const DataLayout &DL) {
1051  if (!MI->hasOneMemOperand())
1052  return;
1053  MachineMemOperand *MM = *MI->memoperands_begin();
1054  if (!MM->getValue())
1055  return;
1056  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1057  for (Value *V : Objs) {
1058  if (!isIdentifiedObject(V)) {
1059  Objs.clear();
1060  return;
1061  }
1062  Objs.push_back(V);
1063  }
1064 }
1065 
1066 /// Add a chain edge between a load and store if the store can be an
1067 /// alias of the load on a subsequent iteration, i.e., a loop carried
1068 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1069 /// but that code doesn't create loop carried dependences.
1070 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1072  Value *UnknownValue =
1074  for (auto &SU : SUnits) {
1075  MachineInstr &MI = *SU.getInstr();
1076  if (isDependenceBarrier(MI, AA))
1077  PendingLoads.clear();
1078  else if (MI.mayLoad()) {
1080  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1081  if (Objs.empty())
1082  Objs.push_back(UnknownValue);
1083  for (auto V : Objs) {
1084  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1085  SUs.push_back(&SU);
1086  }
1087  } else if (MI.mayStore()) {
1089  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1090  if (Objs.empty())
1091  Objs.push_back(UnknownValue);
1092  for (auto V : Objs) {
1094  PendingLoads.find(V);
1095  if (I == PendingLoads.end())
1096  continue;
1097  for (auto Load : I->second) {
1098  if (isSuccOrder(Load, &SU))
1099  continue;
1100  MachineInstr &LdMI = *Load->getInstr();
1101  // First, perform the cheaper check that compares the base register.
1102  // If they are the same and the load offset is less than the store
1103  // offset, then mark the dependence as loop carried potentially.
1104  unsigned BaseReg1, BaseReg2;
1105  int64_t Offset1, Offset2;
1106  if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
1107  TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
1108  if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
1109  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1110  "What happened to the chain edge?");
1111  SDep Dep(Load, SDep::Barrier);
1112  Dep.setLatency(1);
1113  SU.addPred(Dep);
1114  continue;
1115  }
1116  }
1117  // Second, the more expensive check that uses alias analysis on the
1118  // base registers. If they alias, and the load offset is less than
1119  // the store offset, the mark the dependence as loop carried.
1120  if (!AA) {
1121  SDep Dep(Load, SDep::Barrier);
1122  Dep.setLatency(1);
1123  SU.addPred(Dep);
1124  continue;
1125  }
1126  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1127  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1128  if (!MMO1->getValue() || !MMO2->getValue()) {
1129  SDep Dep(Load, SDep::Barrier);
1130  Dep.setLatency(1);
1131  SU.addPred(Dep);
1132  continue;
1133  }
1134  if (MMO1->getValue() == MMO2->getValue() &&
1135  MMO1->getOffset() <= MMO2->getOffset()) {
1136  SDep Dep(Load, SDep::Barrier);
1137  Dep.setLatency(1);
1138  SU.addPred(Dep);
1139  continue;
1140  }
1141  AliasResult AAResult = AA->alias(
1143  MMO1->getAAInfo()),
1145  MMO2->getAAInfo()));
1146 
1147  if (AAResult != NoAlias) {
1148  SDep Dep(Load, SDep::Barrier);
1149  Dep.setLatency(1);
1150  SU.addPred(Dep);
1151  }
1152  }
1153  }
1154  }
1155  }
1156 }
1157 
1158 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1159 /// processes dependences for PHIs. This function adds true dependences
1160 /// from a PHI to a use, and a loop carried dependence from the use to the
1161 /// PHI. The loop carried dependence is represented as an anti dependence
1162 /// edge. This function also removes chain dependences between unrelated
1163 /// PHIs.
1164 void SwingSchedulerDAG::updatePhiDependences() {
1165  SmallVector<SDep, 4> RemoveDeps;
1167 
1168  // Iterate over each DAG node.
1169  for (SUnit &I : SUnits) {
1170  RemoveDeps.clear();
1171  // Set to true if the instruction has an operand defined by a Phi.
1172  unsigned HasPhiUse = 0;
1173  unsigned HasPhiDef = 0;
1174  MachineInstr *MI = I.getInstr();
1175  // Iterate over each operand, and we process the definitions.
1176  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1177  MOE = MI->operands_end();
1178  MOI != MOE; ++MOI) {
1179  if (!MOI->isReg())
1180  continue;
1181  unsigned Reg = MOI->getReg();
1182  if (MOI->isDef()) {
1183  // If the register is used by a Phi, then create an anti dependence.
1185  UI = MRI.use_instr_begin(Reg),
1186  UE = MRI.use_instr_end();
1187  UI != UE; ++UI) {
1188  MachineInstr *UseMI = &*UI;
1189  SUnit *SU = getSUnit(UseMI);
1190  if (SU != nullptr && UseMI->isPHI()) {
1191  if (!MI->isPHI()) {
1192  SDep Dep(SU, SDep::Anti, Reg);
1193  Dep.setLatency(1);
1194  I.addPred(Dep);
1195  } else {
1196  HasPhiDef = Reg;
1197  // Add a chain edge to a dependent Phi that isn't an existing
1198  // predecessor.
1199  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1200  I.addPred(SDep(SU, SDep::Barrier));
1201  }
1202  }
1203  }
1204  } else if (MOI->isUse()) {
1205  // If the register is defined by a Phi, then create a true dependence.
1206  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1207  if (DefMI == nullptr)
1208  continue;
1209  SUnit *SU = getSUnit(DefMI);
1210  if (SU != nullptr && DefMI->isPHI()) {
1211  if (!MI->isPHI()) {
1212  SDep Dep(SU, SDep::Data, Reg);
1213  Dep.setLatency(0);
1214  ST.adjustSchedDependency(SU, &I, Dep);
1215  I.addPred(Dep);
1216  } else {
1217  HasPhiUse = Reg;
1218  // Add a chain edge to a dependent Phi that isn't an existing
1219  // predecessor.
1220  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1221  I.addPred(SDep(SU, SDep::Barrier));
1222  }
1223  }
1224  }
1225  }
1226  // Remove order dependences from an unrelated Phi.
1227  if (!SwpPruneDeps)
1228  continue;
1229  for (auto &PI : I.Preds) {
1230  MachineInstr *PMI = PI.getSUnit()->getInstr();
1231  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1232  if (I.getInstr()->isPHI()) {
1233  if (PMI->getOperand(0).getReg() == HasPhiUse)
1234  continue;
1235  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1236  continue;
1237  }
1238  RemoveDeps.push_back(PI);
1239  }
1240  }
1241  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1242  I.removePred(RemoveDeps[i]);
1243  }
1244 }
1245 
1246 /// Iterate over each DAG node and see if we can change any dependences
1247 /// in order to reduce the recurrence MII.
1248 void SwingSchedulerDAG::changeDependences() {
1249  // See if an instruction can use a value from the previous iteration.
1250  // If so, we update the base and offset of the instruction and change
1251  // the dependences.
1252  for (SUnit &I : SUnits) {
1253  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1254  int64_t NewOffset = 0;
1255  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1256  NewOffset))
1257  continue;
1258 
1259  // Get the MI and SUnit for the instruction that defines the original base.
1260  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1261  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1262  if (!DefMI)
1263  continue;
1264  SUnit *DefSU = getSUnit(DefMI);
1265  if (!DefSU)
1266  continue;
1267  // Get the MI and SUnit for the instruction that defins the new base.
1268  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1269  if (!LastMI)
1270  continue;
1271  SUnit *LastSU = getSUnit(LastMI);
1272  if (!LastSU)
1273  continue;
1274 
1275  if (Topo.IsReachable(&I, LastSU))
1276  continue;
1277 
1278  // Remove the dependence. The value now depends on a prior iteration.
1279  SmallVector<SDep, 4> Deps;
1280  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1281  ++P)
1282  if (P->getSUnit() == DefSU)
1283  Deps.push_back(*P);
1284  for (int i = 0, e = Deps.size(); i != e; i++) {
1285  Topo.RemovePred(&I, Deps[i].getSUnit());
1286  I.removePred(Deps[i]);
1287  }
1288  // Remove the chain dependence between the instructions.
1289  Deps.clear();
1290  for (auto &P : LastSU->Preds)
1291  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1292  Deps.push_back(P);
1293  for (int i = 0, e = Deps.size(); i != e; i++) {
1294  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1295  LastSU->removePred(Deps[i]);
1296  }
1297 
1298  // Add a dependence between the new instruction and the instruction
1299  // that defines the new base.
1300  SDep Dep(&I, SDep::Anti, NewBase);
1301  LastSU->addPred(Dep);
1302 
1303  // Remember the base and offset information so that we can update the
1304  // instruction during code generation.
1305  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1306  }
1307 }
1308 
1309 namespace {
1310 
1311 // FuncUnitSorter - Comparison operator used to sort instructions by
1312 // the number of functional unit choices.
1313 struct FuncUnitSorter {
1314  const InstrItineraryData *InstrItins;
1315  DenseMap<unsigned, unsigned> Resources;
1316 
1317  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1318 
1319  // Compute the number of functional unit alternatives needed
1320  // at each stage, and take the minimum value. We prioritize the
1321  // instructions by the least number of choices first.
1322  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1323  unsigned schedClass = Inst->getDesc().getSchedClass();
1324  unsigned min = UINT_MAX;
1325  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1326  *IE = InstrItins->endStage(schedClass);
1327  IS != IE; ++IS) {
1328  unsigned funcUnits = IS->getUnits();
1329  unsigned numAlternatives = countPopulation(funcUnits);
1330  if (numAlternatives < min) {
1331  min = numAlternatives;
1332  F = funcUnits;
1333  }
1334  }
1335  return min;
1336  }
1337 
1338  // Compute the critical resources needed by the instruction. This
1339  // function records the functional units needed by instructions that
1340  // must use only one functional unit. We use this as a tie breaker
1341  // for computing the resource MII. The instrutions that require
1342  // the same, highly used, functional unit have high priority.
1343  void calcCriticalResources(MachineInstr &MI) {
1344  unsigned SchedClass = MI.getDesc().getSchedClass();
1345  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1346  *IE = InstrItins->endStage(SchedClass);
1347  IS != IE; ++IS) {
1348  unsigned FuncUnits = IS->getUnits();
1349  if (countPopulation(FuncUnits) == 1)
1350  Resources[FuncUnits]++;
1351  }
1352  }
1353 
1354  /// Return true if IS1 has less priority than IS2.
1355  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1356  unsigned F1 = 0, F2 = 0;
1357  unsigned MFUs1 = minFuncUnits(IS1, F1);
1358  unsigned MFUs2 = minFuncUnits(IS2, F2);
1359  if (MFUs1 == 1 && MFUs2 == 1)
1360  return Resources.lookup(F1) < Resources.lookup(F2);
1361  return MFUs1 > MFUs2;
1362  }
1363 };
1364 
1365 } // end anonymous namespace
1366 
1367 /// Calculate the resource constrained minimum initiation interval for the
1368 /// specified loop. We use the DFA to model the resources needed for
1369 /// each instruction, and we ignore dependences. A different DFA is created
1370 /// for each cycle that is required. When adding a new instruction, we attempt
1371 /// to add it to each existing DFA, until a legal space is found. If the
1372 /// instruction cannot be reserved in an existing DFA, we create a new one.
1373 unsigned SwingSchedulerDAG::calculateResMII() {
1375  MachineBasicBlock *MBB = Loop.getHeader();
1376  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1377 
1378  // Sort the instructions by the number of available choices for scheduling,
1379  // least to most. Use the number of critical resources as the tie breaker.
1380  FuncUnitSorter FUS =
1381  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1383  E = MBB->getFirstTerminator();
1384  I != E; ++I)
1385  FUS.calcCriticalResources(*I);
1387  FuncUnitOrder(FUS);
1388 
1390  E = MBB->getFirstTerminator();
1391  I != E; ++I)
1392  FuncUnitOrder.push(&*I);
1393 
1394  while (!FuncUnitOrder.empty()) {
1395  MachineInstr *MI = FuncUnitOrder.top();
1396  FuncUnitOrder.pop();
1397  if (TII->isZeroCost(MI->getOpcode()))
1398  continue;
1399  // Attempt to reserve the instruction in an existing DFA. At least one
1400  // DFA is needed for each cycle.
1401  unsigned NumCycles = getSUnit(MI)->Latency;
1402  unsigned ReservedCycles = 0;
1405  for (unsigned C = 0; C < NumCycles; ++C)
1406  while (RI != RE) {
1407  if ((*RI++)->canReserveResources(*MI)) {
1408  ++ReservedCycles;
1409  break;
1410  }
1411  }
1412  // Start reserving resources using existing DFAs.
1413  for (unsigned C = 0; C < ReservedCycles; ++C) {
1414  --RI;
1415  (*RI)->reserveResources(*MI);
1416  }
1417  // Add new DFAs, if needed, to reserve resources.
1418  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1419  DFAPacketizer *NewResource =
1421  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1422  NewResource->reserveResources(*MI);
1423  Resources.push_back(NewResource);
1424  }
1425  }
1426  int Resmii = Resources.size();
1427  // Delete the memory for each of the DFAs that were created earlier.
1428  for (DFAPacketizer *RI : Resources) {
1429  DFAPacketizer *D = RI;
1430  delete D;
1431  }
1432  Resources.clear();
1433  return Resmii;
1434 }
1435 
1436 /// Calculate the recurrence-constrainted minimum initiation interval.
1437 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1438 /// for each circuit. The II needs to satisfy the inequality
1439 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1440 /// II that satisfies the inequality, and the RecMII is the maximum
1441 /// of those values.
1442 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1443  unsigned RecMII = 0;
1444 
1445  for (NodeSet &Nodes : NodeSets) {
1446  if (Nodes.empty())
1447  continue;
1448 
1449  unsigned Delay = Nodes.getLatency();
1450  unsigned Distance = 1;
1451 
1452  // ii = ceil(delay / distance)
1453  unsigned CurMII = (Delay + Distance - 1) / Distance;
1454  Nodes.setRecMII(CurMII);
1455  if (CurMII > RecMII)
1456  RecMII = CurMII;
1457  }
1458 
1459  return RecMII;
1460 }
1461 
1462 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1463 /// but we do this to find the circuits, and then change them back.
1464 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1466  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1467  SUnit *SU = &SUnits[i];
1468  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1469  IP != EP; ++IP) {
1470  if (IP->getKind() != SDep::Anti)
1471  continue;
1472  DepsAdded.push_back(std::make_pair(SU, *IP));
1473  }
1474  }
1475  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1476  E = DepsAdded.end();
1477  I != E; ++I) {
1478  // Remove this anti dependency and add one in the reverse direction.
1479  SUnit *SU = I->first;
1480  SDep &D = I->second;
1481  SUnit *TargetSU = D.getSUnit();
1482  unsigned Reg = D.getReg();
1483  unsigned Lat = D.getLatency();
1484  SU->removePred(D);
1485  SDep Dep(SU, SDep::Anti, Reg);
1486  Dep.setLatency(Lat);
1487  TargetSU->addPred(Dep);
1488  }
1489 }
1490 
1491 /// Create the adjacency structure of the nodes in the graph.
1492 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1493  SwingSchedulerDAG *DAG) {
1494  BitVector Added(SUnits.size());
1495  DenseMap<int, int> OutputDeps;
1496  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1497  Added.reset();
1498  // Add any successor to the adjacency matrix and exclude duplicates.
1499  for (auto &SI : SUnits[i].Succs) {
1500  // Only create a back-edge on the first and last nodes of a dependence
1501  // chain. This records any chains and adds them later.
1502  if (SI.getKind() == SDep::Output) {
1503  int N = SI.getSUnit()->NodeNum;
1504  int BackEdge = i;
1505  auto Dep = OutputDeps.find(BackEdge);
1506  if (Dep != OutputDeps.end()) {
1507  BackEdge = Dep->second;
1508  OutputDeps.erase(Dep);
1509  }
1510  OutputDeps[N] = BackEdge;
1511  }
1512  // Do not process a boundary node and a back-edge is processed only
1513  // if it goes to a Phi.
1514  if (SI.getSUnit()->isBoundaryNode() ||
1515  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1516  continue;
1517  int N = SI.getSUnit()->NodeNum;
1518  if (!Added.test(N)) {
1519  AdjK[i].push_back(N);
1520  Added.set(N);
1521  }
1522  }
1523  // A chain edge between a store and a load is treated as a back-edge in the
1524  // adjacency matrix.
1525  for (auto &PI : SUnits[i].Preds) {
1526  if (!SUnits[i].getInstr()->mayStore() ||
1527  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1528  continue;
1529  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1530  int N = PI.getSUnit()->NodeNum;
1531  if (!Added.test(N)) {
1532  AdjK[i].push_back(N);
1533  Added.set(N);
1534  }
1535  }
1536  }
1537  }
1538  // Add back-eges in the adjacency matrix for the output dependences.
1539  for (auto &OD : OutputDeps)
1540  if (!Added.test(OD.second)) {
1541  AdjK[OD.first].push_back(OD.second);
1542  Added.set(OD.second);
1543  }
1544 }
1545 
1546 /// Identify an elementary circuit in the dependence graph starting at the
1547 /// specified node.
1548 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1549  bool HasBackedge) {
1550  SUnit *SV = &SUnits[V];
1551  bool F = false;
1552  Stack.insert(SV);
1553  Blocked.set(V);
1554 
1555  for (auto W : AdjK[V]) {
1556  if (NumPaths > MaxPaths)
1557  break;
1558  if (W < S)
1559  continue;
1560  if (W == S) {
1561  if (!HasBackedge)
1562  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1563  F = true;
1564  ++NumPaths;
1565  break;
1566  } else if (!Blocked.test(W)) {
1567  if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
1568  F = true;
1569  }
1570  }
1571 
1572  if (F)
1573  unblock(V);
1574  else {
1575  for (auto W : AdjK[V]) {
1576  if (W < S)
1577  continue;
1578  if (B[W].count(SV) == 0)
1579  B[W].insert(SV);
1580  }
1581  }
1582  Stack.pop_back();
1583  return F;
1584 }
1585 
1586 /// Unblock a node in the circuit finding algorithm.
1587 void SwingSchedulerDAG::Circuits::unblock(int U) {
1588  Blocked.reset(U);
1589  SmallPtrSet<SUnit *, 4> &BU = B[U];
1590  while (!BU.empty()) {
1592  assert(SI != BU.end() && "Invalid B set.");
1593  SUnit *W = *SI;
1594  BU.erase(W);
1595  if (Blocked.test(W->NodeNum))
1596  unblock(W->NodeNum);
1597  }
1598 }
1599 
1600 /// Identify all the elementary circuits in the dependence graph using
1601 /// Johnson's circuit algorithm.
1602 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1603  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1604  // but we do this to find the circuits, and then change them back.
1605  swapAntiDependences(SUnits);
1606 
1607  Circuits Cir(SUnits);
1608  // Create the adjacency structure.
1609  Cir.createAdjacencyStructure(this);
1610  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1611  Cir.reset();
1612  Cir.circuit(i, i, NodeSets);
1613  }
1614 
1615  // Change the dependences back so that we've created a DAG again.
1616  swapAntiDependences(SUnits);
1617 }
1618 
1619 /// Return true for DAG nodes that we ignore when computing the cost functions.
1620 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1621 /// in the calculation of the ASAP, ALAP, etc functions.
1622 static bool ignoreDependence(const SDep &D, bool isPred) {
1623  if (D.isArtificial())
1624  return true;
1625  return D.getKind() == SDep::Anti && isPred;
1626 }
1627 
1628 /// Compute several functions need to order the nodes for scheduling.
1629 /// ASAP - Earliest time to schedule a node.
1630 /// ALAP - Latest time to schedule a node.
1631 /// MOV - Mobility function, difference between ALAP and ASAP.
1632 /// D - Depth of each node.
1633 /// H - Height of each node.
1634 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1635  ScheduleInfo.resize(SUnits.size());
1636 
1637  LLVM_DEBUG({
1639  E = Topo.end();
1640  I != E; ++I) {
1641  SUnit *SU = &SUnits[*I];
1642  SU->dump(this);
1643  }
1644  });
1645 
1646  int maxASAP = 0;
1647  // Compute ASAP and ZeroLatencyDepth.
1649  E = Topo.end();
1650  I != E; ++I) {
1651  int asap = 0;
1652  int zeroLatencyDepth = 0;
1653  SUnit *SU = &SUnits[*I];
1654  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1655  EP = SU->Preds.end();
1656  IP != EP; ++IP) {
1657  SUnit *pred = IP->getSUnit();
1658  if (IP->getLatency() == 0)
1659  zeroLatencyDepth =
1660  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1661  if (ignoreDependence(*IP, true))
1662  continue;
1663  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1664  getDistance(pred, SU, *IP) * MII));
1665  }
1666  maxASAP = std::max(maxASAP, asap);
1667  ScheduleInfo[*I].ASAP = asap;
1668  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1669  }
1670 
1671  // Compute ALAP, ZeroLatencyHeight, and MOV.
1673  E = Topo.rend();
1674  I != E; ++I) {
1675  int alap = maxASAP;
1676  int zeroLatencyHeight = 0;
1677  SUnit *SU = &SUnits[*I];
1678  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1679  ES = SU->Succs.end();
1680  IS != ES; ++IS) {
1681  SUnit *succ = IS->getSUnit();
1682  if (IS->getLatency() == 0)
1683  zeroLatencyHeight =
1684  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1685  if (ignoreDependence(*IS, true))
1686  continue;
1687  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1688  getDistance(SU, succ, *IS) * MII));
1689  }
1690 
1691  ScheduleInfo[*I].ALAP = alap;
1692  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1693  }
1694 
1695  // After computing the node functions, compute the summary for each node set.
1696  for (NodeSet &I : NodeSets)
1697  I.computeNodeSetInfo(this);
1698 
1699  LLVM_DEBUG({
1700  for (unsigned i = 0; i < SUnits.size(); i++) {
1701  dbgs() << "\tNode " << i << ":\n";
1702  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1703  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1704  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1705  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1706  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1707  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1708  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1709  }
1710  });
1711 }
1712 
1713 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1714 /// as the predecessors of the elements of NodeOrder that are not also in
1715 /// NodeOrder.
1716 static bool pred_L(SetVector<SUnit *> &NodeOrder,
1718  const NodeSet *S = nullptr) {
1719  Preds.clear();
1720  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1721  I != E; ++I) {
1722  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1723  PI != PE; ++PI) {
1724  if (S && S->count(PI->getSUnit()) == 0)
1725  continue;
1726  if (ignoreDependence(*PI, true))
1727  continue;
1728  if (NodeOrder.count(PI->getSUnit()) == 0)
1729  Preds.insert(PI->getSUnit());
1730  }
1731  // Back-edges are predecessors with an anti-dependence.
1732  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1733  ES = (*I)->Succs.end();
1734  IS != ES; ++IS) {
1735  if (IS->getKind() != SDep::Anti)
1736  continue;
1737  if (S && S->count(IS->getSUnit()) == 0)
1738  continue;
1739  if (NodeOrder.count(IS->getSUnit()) == 0)
1740  Preds.insert(IS->getSUnit());
1741  }
1742  }
1743  return !Preds.empty();
1744 }
1745 
1746 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1747 /// as the successors of the elements of NodeOrder that are not also in
1748 /// NodeOrder.
1749 static bool succ_L(SetVector<SUnit *> &NodeOrder,
1751  const NodeSet *S = nullptr) {
1752  Succs.clear();
1753  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1754  I != E; ++I) {
1755  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1756  SI != SE; ++SI) {
1757  if (S && S->count(SI->getSUnit()) == 0)
1758  continue;
1759  if (ignoreDependence(*SI, false))
1760  continue;
1761  if (NodeOrder.count(SI->getSUnit()) == 0)
1762  Succs.insert(SI->getSUnit());
1763  }
1764  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1765  PE = (*I)->Preds.end();
1766  PI != PE; ++PI) {
1767  if (PI->getKind() != SDep::Anti)
1768  continue;
1769  if (S && S->count(PI->getSUnit()) == 0)
1770  continue;
1771  if (NodeOrder.count(PI->getSUnit()) == 0)
1772  Succs.insert(PI->getSUnit());
1773  }
1774  }
1775  return !Succs.empty();
1776 }
1777 
1778 /// Return true if there is a path from the specified node to any of the nodes
1779 /// in DestNodes. Keep track and return the nodes in any path.
1780 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1781  SetVector<SUnit *> &DestNodes,
1782  SetVector<SUnit *> &Exclude,
1783  SmallPtrSet<SUnit *, 8> &Visited) {
1784  if (Cur->isBoundaryNode())
1785  return false;
1786  if (Exclude.count(Cur) != 0)
1787  return false;
1788  if (DestNodes.count(Cur) != 0)
1789  return true;
1790  if (!Visited.insert(Cur).second)
1791  return Path.count(Cur) != 0;
1792  bool FoundPath = false;
1793  for (auto &SI : Cur->Succs)
1794  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1795  for (auto &PI : Cur->Preds)
1796  if (PI.getKind() == SDep::Anti)
1797  FoundPath |=
1798  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1799  if (FoundPath)
1800  Path.insert(Cur);
1801  return FoundPath;
1802 }
1803 
1804 /// Return true if Set1 is a subset of Set2.
1805 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1806  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1807  if (Set2.count(*I) == 0)
1808  return false;
1809  return true;
1810 }
1811 
1812 /// Compute the live-out registers for the instructions in a node-set.
1813 /// The live-out registers are those that are defined in the node-set,
1814 /// but not used. Except for use operands of Phis.
1816  NodeSet &NS) {
1818  MachineRegisterInfo &MRI = MF.getRegInfo();
1820  SmallSet<unsigned, 4> Uses;
1821  for (SUnit *SU : NS) {
1822  const MachineInstr *MI = SU->getInstr();
1823  if (MI->isPHI())
1824  continue;
1825  for (const MachineOperand &MO : MI->operands())
1826  if (MO.isReg() && MO.isUse()) {
1827  unsigned Reg = MO.getReg();
1829  Uses.insert(Reg);
1830  else if (MRI.isAllocatable(Reg))
1831  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1832  Uses.insert(*Units);
1833  }
1834  }
1835  for (SUnit *SU : NS)
1836  for (const MachineOperand &MO : SU->getInstr()->operands())
1837  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1838  unsigned Reg = MO.getReg();
1840  if (!Uses.count(Reg))
1841  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1843  } else if (MRI.isAllocatable(Reg)) {
1844  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1845  if (!Uses.count(*Units))
1846  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1848  }
1849  }
1850  RPTracker.addLiveRegs(LiveOutRegs);
1851 }
1852 
1853 /// A heuristic to filter nodes in recurrent node-sets if the register
1854 /// pressure of a set is too high.
1855 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1856  for (auto &NS : NodeSets) {
1857  // Skip small node-sets since they won't cause register pressure problems.
1858  if (NS.size() <= 2)
1859  continue;
1860  IntervalPressure RecRegPressure;
1861  RegPressureTracker RecRPTracker(RecRegPressure);
1862  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1863  computeLiveOuts(MF, RecRPTracker, NS);
1864  RecRPTracker.closeBottom();
1865 
1866  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1867  llvm::sort(SUnits.begin(), SUnits.end(),
1868  [](const SUnit *A, const SUnit *B) {
1869  return A->NodeNum > B->NodeNum;
1870  });
1871 
1872  for (auto &SU : SUnits) {
1873  // Since we're computing the register pressure for a subset of the
1874  // instructions in a block, we need to set the tracker for each
1875  // instruction in the node-set. The tracker is set to the instruction
1876  // just after the one we're interested in.
1878  RecRPTracker.setPos(std::next(CurInstI));
1879 
1880  RegPressureDelta RPDelta;
1881  ArrayRef<PressureChange> CriticalPSets;
1882  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1883  CriticalPSets,
1884  RecRegPressure.MaxSetPressure);
1885  if (RPDelta.Excess.isValid()) {
1886  LLVM_DEBUG(
1887  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1888  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1889  << ":" << RPDelta.Excess.getUnitInc());
1890  NS.setExceedPressure(SU);
1891  break;
1892  }
1893  RecRPTracker.recede();
1894  }
1895  }
1896 }
1897 
1898 /// A heuristic to colocate node sets that have the same set of
1899 /// successors.
1900 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1901  unsigned Colocate = 0;
1902  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1903  NodeSet &N1 = NodeSets[i];
1905  if (N1.empty() || !succ_L(N1, S1))
1906  continue;
1907  for (int j = i + 1; j < e; ++j) {
1908  NodeSet &N2 = NodeSets[j];
1909  if (N1.compareRecMII(N2) != 0)
1910  continue;
1912  if (N2.empty() || !succ_L(N2, S2))
1913  continue;
1914  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1915  N1.setColocate(++Colocate);
1916  N2.setColocate(Colocate);
1917  break;
1918  }
1919  }
1920  }
1921 }
1922 
1923 /// Check if the existing node-sets are profitable. If not, then ignore the
1924 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1925 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1926 /// then it's best to try to schedule all instructions together instead of
1927 /// starting with the recurrent node-sets.
1928 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1929  // Look for loops with a large MII.
1930  if (MII < 17)
1931  return;
1932  // Check if the node-set contains only a simple add recurrence.
1933  for (auto &NS : NodeSets) {
1934  if (NS.getRecMII() > 2)
1935  return;
1936  if (NS.getMaxDepth() > MII)
1937  return;
1938  }
1939  NodeSets.clear();
1940  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1941  return;
1942 }
1943 
1944 /// Add the nodes that do not belong to a recurrence set into groups
1945 /// based upon connected componenets.
1946 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1947  SetVector<SUnit *> NodesAdded;
1948  SmallPtrSet<SUnit *, 8> Visited;
1949  // Add the nodes that are on a path between the previous node sets and
1950  // the current node set.
1951  for (NodeSet &I : NodeSets) {
1953  // Add the nodes from the current node set to the previous node set.
1954  if (succ_L(I, N)) {
1955  SetVector<SUnit *> Path;
1956  for (SUnit *NI : N) {
1957  Visited.clear();
1958  computePath(NI, Path, NodesAdded, I, Visited);
1959  }
1960  if (!Path.empty())
1961  I.insert(Path.begin(), Path.end());
1962  }
1963  // Add the nodes from the previous node set to the current node set.
1964  N.clear();
1965  if (succ_L(NodesAdded, N)) {
1966  SetVector<SUnit *> Path;
1967  for (SUnit *NI : N) {
1968  Visited.clear();
1969  computePath(NI, Path, I, NodesAdded, Visited);
1970  }
1971  if (!Path.empty())
1972  I.insert(Path.begin(), Path.end());
1973  }
1974  NodesAdded.insert(I.begin(), I.end());
1975  }
1976 
1977  // Create a new node set with the connected nodes of any successor of a node
1978  // in a recurrent set.
1979  NodeSet NewSet;
1981  if (succ_L(NodesAdded, N))
1982  for (SUnit *I : N)
1983  addConnectedNodes(I, NewSet, NodesAdded);
1984  if (!NewSet.empty())
1985  NodeSets.push_back(NewSet);
1986 
1987  // Create a new node set with the connected nodes of any predecessor of a node
1988  // in a recurrent set.
1989  NewSet.clear();
1990  if (pred_L(NodesAdded, N))
1991  for (SUnit *I : N)
1992  addConnectedNodes(I, NewSet, NodesAdded);
1993  if (!NewSet.empty())
1994  NodeSets.push_back(NewSet);
1995 
1996  // Create new nodes sets with the connected nodes any remaining node that
1997  // has no predecessor.
1998  for (unsigned i = 0; i < SUnits.size(); ++i) {
1999  SUnit *SU = &SUnits[i];
2000  if (NodesAdded.count(SU) == 0) {
2001  NewSet.clear();
2002  addConnectedNodes(SU, NewSet, NodesAdded);
2003  if (!NewSet.empty())
2004  NodeSets.push_back(NewSet);
2005  }
2006  }
2007 }
2008 
2009 /// Add the node to the set, and add all is its connected nodes to the set.
2010 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2011  SetVector<SUnit *> &NodesAdded) {
2012  NewSet.insert(SU);
2013  NodesAdded.insert(SU);
2014  for (auto &SI : SU->Succs) {
2015  SUnit *Successor = SI.getSUnit();
2016  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
2017  addConnectedNodes(Successor, NewSet, NodesAdded);
2018  }
2019  for (auto &PI : SU->Preds) {
2020  SUnit *Predecessor = PI.getSUnit();
2021  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
2022  addConnectedNodes(Predecessor, NewSet, NodesAdded);
2023  }
2024 }
2025 
2026 /// Return true if Set1 contains elements in Set2. The elements in common
2027 /// are returned in a different container.
2028 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
2029  SmallSetVector<SUnit *, 8> &Result) {
2030  Result.clear();
2031  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
2032  SUnit *SU = Set1[i];
2033  if (Set2.count(SU) != 0)
2034  Result.insert(SU);
2035  }
2036  return !Result.empty();
2037 }
2038 
2039 /// Merge the recurrence node sets that have the same initial node.
2040 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2041  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2042  ++I) {
2043  NodeSet &NI = *I;
2044  for (NodeSetType::iterator J = I + 1; J != E;) {
2045  NodeSet &NJ = *J;
2046  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
2047  if (NJ.compareRecMII(NI) > 0)
2048  NI.setRecMII(NJ.getRecMII());
2049  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
2050  ++NII)
2051  I->insert(*NII);
2052  NodeSets.erase(J);
2053  E = NodeSets.end();
2054  } else {
2055  ++J;
2056  }
2057  }
2058  }
2059 }
2060 
2061 /// Remove nodes that have been scheduled in previous NodeSets.
2062 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2063  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2064  ++I)
2065  for (NodeSetType::iterator J = I + 1; J != E;) {
2066  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2067 
2068  if (J->empty()) {
2069  NodeSets.erase(J);
2070  E = NodeSets.end();
2071  } else {
2072  ++J;
2073  }
2074  }
2075 }
2076 
2077 /// Compute an ordered list of the dependence graph nodes, which
2078 /// indicates the order that the nodes will be scheduled. This is a
2079 /// two-level algorithm. First, a partial order is created, which
2080 /// consists of a list of sets ordered from highest to lowest priority.
2081 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2083  NodeOrder.clear();
2084 
2085  for (auto &Nodes : NodeSets) {
2086  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2087  OrderKind Order;
2089  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2090  R.insert(N.begin(), N.end());
2091  Order = BottomUp;
2092  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
2093  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2094  R.insert(N.begin(), N.end());
2095  Order = TopDown;
2096  LLVM_DEBUG(dbgs() << " Top down (succs) ");
2097  } else if (isIntersect(N, Nodes, R)) {
2098  // If some of the successors are in the existing node-set, then use the
2099  // top-down ordering.
2100  Order = TopDown;
2101  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
2102  } else if (NodeSets.size() == 1) {
2103  for (auto &N : Nodes)
2104  if (N->Succs.size() == 0)
2105  R.insert(N);
2106  Order = BottomUp;
2107  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
2108  } else {
2109  // Find the node with the highest ASAP.
2110  SUnit *maxASAP = nullptr;
2111  for (SUnit *SU : Nodes) {
2112  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
2113  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2114  maxASAP = SU;
2115  }
2116  R.insert(maxASAP);
2117  Order = BottomUp;
2118  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
2119  }
2120 
2121  while (!R.empty()) {
2122  if (Order == TopDown) {
2123  // Choose the node with the maximum height. If more than one, choose
2124  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
2125  // choose the node with the lowest MOV.
2126  while (!R.empty()) {
2127  SUnit *maxHeight = nullptr;
2128  for (SUnit *I : R) {
2129  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2130  maxHeight = I;
2131  else if (getHeight(I) == getHeight(maxHeight) &&
2132  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
2133  maxHeight = I;
2134  else if (getHeight(I) == getHeight(maxHeight) &&
2135  getZeroLatencyHeight(I) ==
2136  getZeroLatencyHeight(maxHeight) &&
2137  getMOV(I) < getMOV(maxHeight))
2138  maxHeight = I;
2139  }
2140  NodeOrder.insert(maxHeight);
2141  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
2142  R.remove(maxHeight);
2143  for (const auto &I : maxHeight->Succs) {
2144  if (Nodes.count(I.getSUnit()) == 0)
2145  continue;
2146  if (NodeOrder.count(I.getSUnit()) != 0)
2147  continue;
2148  if (ignoreDependence(I, false))
2149  continue;
2150  R.insert(I.getSUnit());
2151  }
2152  // Back-edges are predecessors with an anti-dependence.
2153  for (const auto &I : maxHeight->Preds) {
2154  if (I.getKind() != SDep::Anti)
2155  continue;
2156  if (Nodes.count(I.getSUnit()) == 0)
2157  continue;
2158  if (NodeOrder.count(I.getSUnit()) != 0)
2159  continue;
2160  R.insert(I.getSUnit());
2161  }
2162  }
2163  Order = BottomUp;
2164  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2166  if (pred_L(NodeOrder, N, &Nodes))
2167  R.insert(N.begin(), N.end());
2168  } else {
2169  // Choose the node with the maximum depth. If more than one, choose
2170  // the node with the maximum ZeroLatencyDepth. If still more than one,
2171  // choose the node with the lowest MOV.
2172  while (!R.empty()) {
2173  SUnit *maxDepth = nullptr;
2174  for (SUnit *I : R) {
2175  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2176  maxDepth = I;
2177  else if (getDepth(I) == getDepth(maxDepth) &&
2178  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
2179  maxDepth = I;
2180  else if (getDepth(I) == getDepth(maxDepth) &&
2181  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
2182  getMOV(I) < getMOV(maxDepth))
2183  maxDepth = I;
2184  }
2185  NodeOrder.insert(maxDepth);
2186  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
2187  R.remove(maxDepth);
2188  if (Nodes.isExceedSU(maxDepth)) {
2189  Order = TopDown;
2190  R.clear();
2191  R.insert(Nodes.getNode(0));
2192  break;
2193  }
2194  for (const auto &I : maxDepth->Preds) {
2195  if (Nodes.count(I.getSUnit()) == 0)
2196  continue;
2197  if (NodeOrder.count(I.getSUnit()) != 0)
2198  continue;
2199  R.insert(I.getSUnit());
2200  }
2201  // Back-edges are predecessors with an anti-dependence.
2202  for (const auto &I : maxDepth->Succs) {
2203  if (I.getKind() != SDep::Anti)
2204  continue;
2205  if (Nodes.count(I.getSUnit()) == 0)
2206  continue;
2207  if (NodeOrder.count(I.getSUnit()) != 0)
2208  continue;
2209  R.insert(I.getSUnit());
2210  }
2211  }
2212  Order = TopDown;
2213  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2215  if (succ_L(NodeOrder, N, &Nodes))
2216  R.insert(N.begin(), N.end());
2217  }
2218  }
2219  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2220  }
2221 
2222  LLVM_DEBUG({
2223  dbgs() << "Node order: ";
2224  for (SUnit *I : NodeOrder)
2225  dbgs() << " " << I->NodeNum << " ";
2226  dbgs() << "\n";
2227  });
2228 }
2229 
2230 /// Process the nodes in the computed order and create the pipelined schedule
2231 /// of the instructions, if possible. Return true if a schedule is found.
2232 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2233  if (NodeOrder.empty())
2234  return false;
2235 
2236  bool scheduleFound = false;
2237  // Keep increasing II until a valid schedule is found.
2238  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2239  Schedule.reset();
2240  Schedule.setInitiationInterval(II);
2241  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2242 
2243  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2244  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2245  do {
2246  SUnit *SU = *NI;
2247 
2248  // Compute the schedule time for the instruction, which is based
2249  // upon the scheduled time for any predecessors/successors.
2250  int EarlyStart = INT_MIN;
2251  int LateStart = INT_MAX;
2252  // These values are set when the size of the schedule window is limited
2253  // due to chain dependences.
2254  int SchedEnd = INT_MAX;
2255  int SchedStart = INT_MIN;
2256  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2257  II, this);
2258  LLVM_DEBUG({
2259  dbgs() << "Inst (" << SU->NodeNum << ") ";
2260  SU->getInstr()->dump();
2261  dbgs() << "\n";
2262  });
2263  LLVM_DEBUG({
2264  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2265  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2266  });
2267 
2268  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2269  SchedStart > LateStart)
2270  scheduleFound = false;
2271  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2272  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2273  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2274  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2275  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2276  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2277  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2278  SchedEnd =
2279  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2280  // When scheduling a Phi it is better to start at the late cycle and go
2281  // backwards. The default order may insert the Phi too far away from
2282  // its first dependence.
2283  if (SU->getInstr()->isPHI())
2284  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2285  else
2286  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2287  } else {
2288  int FirstCycle = Schedule.getFirstCycle();
2289  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2290  FirstCycle + getASAP(SU) + II - 1, II);
2291  }
2292  // Even if we find a schedule, make sure the schedule doesn't exceed the
2293  // allowable number of stages. We keep trying if this happens.
2294  if (scheduleFound)
2295  if (SwpMaxStages > -1 &&
2296  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2297  scheduleFound = false;
2298 
2299  LLVM_DEBUG({
2300  if (!scheduleFound)
2301  dbgs() << "\tCan't schedule\n";
2302  });
2303  } while (++NI != NE && scheduleFound);
2304 
2305  // If a schedule is found, check if it is a valid schedule too.
2306  if (scheduleFound)
2307  scheduleFound = Schedule.isValidSchedule(this);
2308  }
2309 
2310  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2311 
2312  if (scheduleFound)
2313  Schedule.finalizeSchedule(this);
2314  else
2315  Schedule.reset();
2316 
2317  return scheduleFound && Schedule.getMaxStageCount() > 0;
2318 }
2319 
2320 /// Given a schedule for the loop, generate a new version of the loop,
2321 /// and replace the old version. This function generates a prolog
2322 /// that contains the initial iterations in the pipeline, and kernel
2323 /// loop, and the epilogue that contains the code for the final
2324 /// iterations.
2325 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2326  // Create a new basic block for the kernel and add it to the CFG.
2327  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2328 
2329  unsigned MaxStageCount = Schedule.getMaxStageCount();
2330 
2331  // Remember the registers that are used in different stages. The index is
2332  // the iteration, or stage, that the instruction is scheduled in. This is
2333  // a map between register names in the original block and the names created
2334  // in each stage of the pipelined loop.
2335  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2336  InstrMapTy InstrMap;
2337 
2339  // Generate the prolog instructions that set up the pipeline.
2340  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2341  MF.insert(BB->getIterator(), KernelBB);
2342 
2343  // Rearrange the instructions to generate the new, pipelined loop,
2344  // and update register names as needed.
2345  for (int Cycle = Schedule.getFirstCycle(),
2346  LastCycle = Schedule.getFinalCycle();
2347  Cycle <= LastCycle; ++Cycle) {
2348  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2349  // This inner loop schedules each instruction in the cycle.
2350  for (SUnit *CI : CycleInstrs) {
2351  if (CI->getInstr()->isPHI())
2352  continue;
2353  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2354  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2355  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2356  KernelBB->push_back(NewMI);
2357  InstrMap[NewMI] = CI->getInstr();
2358  }
2359  }
2360 
2361  // Copy any terminator instructions to the new kernel, and update
2362  // names as needed.
2363  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2364  E = BB->instr_end();
2365  I != E; ++I) {
2366  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2367  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2368  KernelBB->push_back(NewMI);
2369  InstrMap[NewMI] = &*I;
2370  }
2371 
2372  KernelBB->transferSuccessors(BB);
2373  KernelBB->replaceSuccessor(BB, KernelBB);
2374 
2375  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2376  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2377  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2378  InstrMap, MaxStageCount, MaxStageCount, false);
2379 
2380  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2381 
2383  // Generate the epilog instructions to complete the pipeline.
2384  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2385  PrologBBs);
2386 
2387  // We need this step because the register allocation doesn't handle some
2388  // situations well, so we insert copies to help out.
2389  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2390 
2391  // Remove dead instructions due to loop induction variables.
2392  removeDeadInstructions(KernelBB, EpilogBBs);
2393 
2394  // Add branches between prolog and epilog blocks.
2395  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2396 
2397  // Remove the original loop since it's no longer referenced.
2398  for (auto &I : *BB)
2400  BB->clear();
2401  BB->eraseFromParent();
2402 
2403  delete[] VRMap;
2404 }
2405 
2406 /// Generate the pipeline prolog code.
2407 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2408  MachineBasicBlock *KernelBB,
2409  ValueMapTy *VRMap,
2410  MBBVectorTy &PrologBBs) {
2411  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2412  assert(PreheaderBB != nullptr &&
2413  "Need to add code to handle loops w/o preheader");
2414  MachineBasicBlock *PredBB = PreheaderBB;
2415  InstrMapTy InstrMap;
2416 
2417  // Generate a basic block for each stage, not including the last stage,
2418  // which will be generated in the kernel. Each basic block may contain
2419  // instructions from multiple stages/iterations.
2420  for (unsigned i = 0; i < LastStage; ++i) {
2421  // Create and insert the prolog basic block prior to the original loop
2422  // basic block. The original loop is removed later.
2423  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2424  PrologBBs.push_back(NewBB);
2425  MF.insert(BB->getIterator(), NewBB);
2426  NewBB->transferSuccessors(PredBB);
2427  PredBB->addSuccessor(NewBB);
2428  PredBB = NewBB;
2429 
2430  // Generate instructions for each appropriate stage. Process instructions
2431  // in original program order.
2432  for (int StageNum = i; StageNum >= 0; --StageNum) {
2433  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2434  BBE = BB->getFirstTerminator();
2435  BBI != BBE; ++BBI) {
2436  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2437  if (BBI->isPHI())
2438  continue;
2439  MachineInstr *NewMI =
2440  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2441  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2442  VRMap);
2443  NewBB->push_back(NewMI);
2444  InstrMap[NewMI] = &*BBI;
2445  }
2446  }
2447  }
2448  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2449  LLVM_DEBUG({
2450  dbgs() << "prolog:\n";
2451  NewBB->dump();
2452  });
2453  }
2454 
2455  PredBB->replaceSuccessor(BB, KernelBB);
2456 
2457  // Check if we need to remove the branch from the preheader to the original
2458  // loop, and replace it with a branch to the new loop.
2459  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2460  if (numBranches) {
2462  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2463  }
2464 }
2465 
2466 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2467 /// that were started in either the prolog or the kernel. We create a basic
2468 /// block for each stage that needs to complete.
2469 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2470  MachineBasicBlock *KernelBB,
2471  ValueMapTy *VRMap,
2472  MBBVectorTy &EpilogBBs,
2473  MBBVectorTy &PrologBBs) {
2474  // We need to change the branch from the kernel to the first epilog block, so
2475  // this call to analyze branch uses the kernel rather than the original BB.
2476  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2478  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2479  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2480  if (checkBranch)
2481  return;
2482 
2483  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2484  if (*LoopExitI == KernelBB)
2485  ++LoopExitI;
2486  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2487  MachineBasicBlock *LoopExitBB = *LoopExitI;
2488 
2489  MachineBasicBlock *PredBB = KernelBB;
2490  MachineBasicBlock *EpilogStart = LoopExitBB;
2491  InstrMapTy InstrMap;
2492 
2493  // Generate a basic block for each stage, not including the last stage,
2494  // which was generated for the kernel. Each basic block may contain
2495  // instructions from multiple stages/iterations.
2496  int EpilogStage = LastStage + 1;
2497  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2499  EpilogBBs.push_back(NewBB);
2500  MF.insert(BB->getIterator(), NewBB);
2501 
2502  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2503  NewBB->addSuccessor(LoopExitBB);
2504 
2505  if (EpilogStart == LoopExitBB)
2506  EpilogStart = NewBB;
2507 
2508  // Add instructions to the epilog depending on the current block.
2509  // Process instructions in original program order.
2510  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2511  for (auto &BBI : *BB) {
2512  if (BBI.isPHI())
2513  continue;
2514  MachineInstr *In = &BBI;
2515  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2516  // Instructions with memoperands in the epilog are updated with
2517  // conservative values.
2518  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2519  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2520  NewBB->push_back(NewMI);
2521  InstrMap[NewMI] = In;
2522  }
2523  }
2524  }
2525  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2526  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2527  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2528  InstrMap, LastStage, EpilogStage, i == 1);
2529  PredBB = NewBB;
2530 
2531  LLVM_DEBUG({
2532  dbgs() << "epilog:\n";
2533  NewBB->dump();
2534  });
2535  }
2536 
2537  // Fix any Phi nodes in the loop exit block.
2538  for (MachineInstr &MI : *LoopExitBB) {
2539  if (!MI.isPHI())
2540  break;
2541  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2542  MachineOperand &MO = MI.getOperand(i);
2543  if (MO.getMBB() == BB)
2544  MO.setMBB(PredBB);
2545  }
2546  }
2547 
2548  // Create a branch to the new epilog from the kernel.
2549  // Remove the original branch and add a new branch to the epilog.
2550  TII->removeBranch(*KernelBB);
2551  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2552  // Add a branch to the loop exit.
2553  if (EpilogBBs.size() > 0) {
2554  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2556  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2557  }
2558 }
2559 
2560 /// Replace all uses of FromReg that appear outside the specified
2561 /// basic block with ToReg.
2562 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2563  MachineBasicBlock *MBB,
2564  MachineRegisterInfo &MRI,
2565  LiveIntervals &LIS) {
2566  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2567  E = MRI.use_end();
2568  I != E;) {
2569  MachineOperand &O = *I;
2570  ++I;
2571  if (O.getParent()->getParent() != MBB)
2572  O.setReg(ToReg);
2573  }
2574  if (!LIS.hasInterval(ToReg))
2575  LIS.createEmptyInterval(ToReg);
2576 }
2577 
2578 /// Return true if the register has a use that occurs outside the
2579 /// specified loop.
2580 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2581  MachineRegisterInfo &MRI) {
2583  E = MRI.use_end();
2584  I != E; ++I)
2585  if (I->getParent()->getParent() != BB)
2586  return true;
2587  return false;
2588 }
2589 
2590 /// Generate Phis for the specific block in the generated pipelined code.
2591 /// This function looks at the Phis from the original code to guide the
2592 /// creation of new Phis.
2593 void SwingSchedulerDAG::generateExistingPhis(
2595  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2596  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2597  bool IsLast) {
2598  // Compute the stage number for the initial value of the Phi, which
2599  // comes from the prolog. The prolog to use depends on to which kernel/
2600  // epilog that we're adding the Phi.
2601  unsigned PrologStage = 0;
2602  unsigned PrevStage = 0;
2603  bool InKernel = (LastStageNum == CurStageNum);
2604  if (InKernel) {
2605  PrologStage = LastStageNum - 1;
2606  PrevStage = CurStageNum;
2607  } else {
2608  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2609  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2610  }
2611 
2612  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2613  BBE = BB->getFirstNonPHI();
2614  BBI != BBE; ++BBI) {
2615  unsigned Def = BBI->getOperand(0).getReg();
2616 
2617  unsigned InitVal = 0;
2618  unsigned LoopVal = 0;
2619  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2620 
2621  unsigned PhiOp1 = 0;
2622  // The Phi value from the loop body typically is defined in the loop, but
2623  // not always. So, we need to check if the value is defined in the loop.
2624  unsigned PhiOp2 = LoopVal;
2625  if (VRMap[LastStageNum].count(LoopVal))
2626  PhiOp2 = VRMap[LastStageNum][LoopVal];
2627 
2628  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2629  int LoopValStage =
2630  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2631  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2632  if (NumStages == 0) {
2633  // We don't need to generate a Phi anymore, but we need to rename any uses
2634  // of the Phi value.
2635  unsigned NewReg = VRMap[PrevStage][LoopVal];
2636  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2637  Def, InitVal, NewReg);
2638  if (VRMap[CurStageNum].count(LoopVal))
2639  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2640  }
2641  // Adjust the number of Phis needed depending on the number of prologs left,
2642  // and the distance from where the Phi is first scheduled. The number of
2643  // Phis cannot exceed the number of prolog stages. Each stage can
2644  // potentially define two values.
2645  unsigned MaxPhis = PrologStage + 2;
2646  if (!InKernel && (int)PrologStage <= LoopValStage)
2647  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2648  unsigned NumPhis = std::min(NumStages, MaxPhis);
2649 
2650  unsigned NewReg = 0;
2651  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2652  // In the epilog, we may need to look back one stage to get the correct
2653  // Phi name because the epilog and prolog blocks execute the same stage.
2654  // The correct name is from the previous block only when the Phi has
2655  // been completely scheduled prior to the epilog, and Phi value is not
2656  // needed in multiple stages.
2657  int StageDiff = 0;
2658  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2659  NumPhis == 1)
2660  StageDiff = 1;
2661  // Adjust the computations below when the phi and the loop definition
2662  // are scheduled in different stages.
2663  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2664  StageDiff = StageScheduled - LoopValStage;
2665  for (unsigned np = 0; np < NumPhis; ++np) {
2666  // If the Phi hasn't been scheduled, then use the initial Phi operand
2667  // value. Otherwise, use the scheduled version of the instruction. This
2668  // is a little complicated when a Phi references another Phi.
2669  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2670  PhiOp1 = InitVal;
2671  // Check if the Phi has already been scheduled in a prolog stage.
2672  else if (PrologStage >= AccessStage + StageDiff + np &&
2673  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2674  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2675  // Check if the Phi has already been scheduled, but the loop intruction
2676  // is either another Phi, or doesn't occur in the loop.
2677  else if (PrologStage >= AccessStage + StageDiff + np) {
2678  // If the Phi references another Phi, we need to examine the other
2679  // Phi to get the correct value.
2680  PhiOp1 = LoopVal;
2681  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2682  int Indirects = 1;
2683  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2684  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2685  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2686  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2687  else
2688  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2689  InstOp1 = MRI.getVRegDef(PhiOp1);
2690  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2691  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2692  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2693  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2694  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2695  break;
2696  }
2697  ++Indirects;
2698  }
2699  } else
2700  PhiOp1 = InitVal;
2701  // If this references a generated Phi in the kernel, get the Phi operand
2702  // from the incoming block.
2703  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2704  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2705  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2706 
2707  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2708  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2709  // In the epilog, a map lookup is needed to get the value from the kernel,
2710  // or previous epilog block. How is does this depends on if the
2711  // instruction is scheduled in the previous block.
2712  if (!InKernel) {
2713  int StageDiffAdj = 0;
2714  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2715  StageDiffAdj = StageScheduled - LoopValStage;
2716  // Use the loop value defined in the kernel, unless the kernel
2717  // contains the last definition of the Phi.
2718  if (np == 0 && PrevStage == LastStageNum &&
2719  (StageScheduled != 0 || LoopValStage != 0) &&
2720  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2721  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2722  // Use the value defined by the Phi. We add one because we switch
2723  // from looking at the loop value to the Phi definition.
2724  else if (np > 0 && PrevStage == LastStageNum &&
2725  VRMap[PrevStage - np + 1].count(Def))
2726  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2727  // Use the loop value defined in the kernel.
2728  else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
2729  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2730  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2731  // Use the value defined by the Phi, unless we're generating the first
2732  // epilog and the Phi refers to a Phi in a different stage.
2733  else if (VRMap[PrevStage - np].count(Def) &&
2734  (!LoopDefIsPhi || PrevStage != LastStageNum))
2735  PhiOp2 = VRMap[PrevStage - np][Def];
2736  }
2737 
2738  // Check if we can reuse an existing Phi. This occurs when a Phi
2739  // references another Phi, and the other Phi is scheduled in an
2740  // earlier stage. We can try to reuse an existing Phi up until the last
2741  // stage of the current Phi.
2742  if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {
2743  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2744  int StageDiff = (StageScheduled - LoopValStage);
2745  LVNumStages -= StageDiff;
2746  // Make sure the loop value Phi has been processed already.
2747  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2748  NewReg = PhiOp2;
2749  unsigned ReuseStage = CurStageNum;
2750  if (Schedule.isLoopCarried(this, *PhiInst))
2751  ReuseStage -= LVNumStages;
2752  // Check if the Phi to reuse has been generated yet. If not, then
2753  // there is nothing to reuse.
2754  if (VRMap[ReuseStage - np].count(LoopVal)) {
2755  NewReg = VRMap[ReuseStage - np][LoopVal];
2756 
2757  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2758  &*BBI, Def, NewReg);
2759  // Update the map with the new Phi name.
2760  VRMap[CurStageNum - np][Def] = NewReg;
2761  PhiOp2 = NewReg;
2762  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2763  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2764 
2765  if (IsLast && np == NumPhis - 1)
2766  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2767  continue;
2768  }
2769  } else if (InKernel && StageDiff > 0 &&
2770  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2771  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2772  }
2773 
2774  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2775  NewReg = MRI.createVirtualRegister(RC);
2776 
2777  MachineInstrBuilder NewPhi =
2778  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2779  TII->get(TargetOpcode::PHI), NewReg);
2780  NewPhi.addReg(PhiOp1).addMBB(BB1);
2781  NewPhi.addReg(PhiOp2).addMBB(BB2);
2782  if (np == 0)
2783  InstrMap[NewPhi] = &*BBI;
2784 
2785  // We define the Phis after creating the new pipelined code, so
2786  // we need to rename the Phi values in scheduled instructions.
2787 
2788  unsigned PrevReg = 0;
2789  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2790  PrevReg = VRMap[PrevStage - np][LoopVal];
2791  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2792  Def, NewReg, PrevReg);
2793  // If the Phi has been scheduled, use the new name for rewriting.
2794  if (VRMap[CurStageNum - np].count(Def)) {
2795  unsigned R = VRMap[CurStageNum - np][Def];
2796  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2797  R, NewReg);
2798  }
2799 
2800  // Check if we need to rename any uses that occurs after the loop. The
2801  // register to replace depends on whether the Phi is scheduled in the
2802  // epilog.
2803  if (IsLast && np == NumPhis - 1)
2804  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2805 
2806  // In the kernel, a dependent Phi uses the value from this Phi.
2807  if (InKernel)
2808  PhiOp2 = NewReg;
2809 
2810  // Update the map with the new Phi name.
2811  VRMap[CurStageNum - np][Def] = NewReg;
2812  }
2813 
2814  while (NumPhis++ < NumStages) {
2815  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2816  &*BBI, Def, NewReg, 0);
2817  }
2818 
2819  // Check if we need to rename a Phi that has been eliminated due to
2820  // scheduling.
2821  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2822  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2823  }
2824 }
2825 
2826 /// Generate Phis for the specified block in the generated pipelined code.
2827 /// These are new Phis needed because the definition is scheduled after the
2828 /// use in the pipelined sequence.
2829 void SwingSchedulerDAG::generatePhis(
2831  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2832  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2833  bool IsLast) {
2834  // Compute the stage number that contains the initial Phi value, and
2835  // the Phi from the previous stage.
2836  unsigned PrologStage = 0;
2837  unsigned PrevStage = 0;
2838  unsigned StageDiff = CurStageNum - LastStageNum;
2839  bool InKernel = (StageDiff == 0);
2840  if (InKernel) {
2841  PrologStage = LastStageNum - 1;
2842  PrevStage = CurStageNum;
2843  } else {
2844  PrologStage = LastStageNum - StageDiff;
2845  PrevStage = LastStageNum + StageDiff - 1;
2846  }
2847 
2848  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2849  BBE = BB->instr_end();
2850  BBI != BBE; ++BBI) {
2851  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2852  MachineOperand &MO = BBI->getOperand(i);
2853  if (!MO.isReg() || !MO.isDef() ||
2855  continue;
2856 
2857  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2858  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2859  unsigned Def = MO.getReg();
2860  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2861  // An instruction scheduled in stage 0 and is used after the loop
2862  // requires a phi in the epilog for the last definition from either
2863  // the kernel or prolog.
2864  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2865  hasUseAfterLoop(Def, BB, MRI))
2866  NumPhis = 1;
2867  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2868  continue;
2869 
2870  unsigned PhiOp2 = VRMap[PrevStage][Def];
2871  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2872  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2873  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2874  // The number of Phis can't exceed the number of prolog stages. The
2875  // prolog stage number is zero based.
2876  if (NumPhis > PrologStage + 1 - StageScheduled)
2877  NumPhis = PrologStage + 1 - StageScheduled;
2878  for (unsigned np = 0; np < NumPhis; ++np) {
2879  unsigned PhiOp1 = VRMap[PrologStage][Def];
2880  if (np <= PrologStage)
2881  PhiOp1 = VRMap[PrologStage - np][Def];
2882  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2883  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2884  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2885  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2886  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2887  }
2888  if (!InKernel)
2889  PhiOp2 = VRMap[PrevStage - np][Def];
2890 
2891  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2892  unsigned NewReg = MRI.createVirtualRegister(RC);
2893 
2894  MachineInstrBuilder NewPhi =
2895  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2896  TII->get(TargetOpcode::PHI), NewReg);
2897  NewPhi.addReg(PhiOp1).addMBB(BB1);
2898  NewPhi.addReg(PhiOp2).addMBB(BB2);
2899  if (np == 0)
2900  InstrMap[NewPhi] = &*BBI;
2901 
2902  // Rewrite uses and update the map. The actions depend upon whether
2903  // we generating code for the kernel or epilog blocks.
2904  if (InKernel) {
2905  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2906  &*BBI, PhiOp1, NewReg);
2907  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2908  &*BBI, PhiOp2, NewReg);
2909 
2910  PhiOp2 = NewReg;
2911  VRMap[PrevStage - np - 1][Def] = NewReg;
2912  } else {
2913  VRMap[CurStageNum - np][Def] = NewReg;
2914  if (np == NumPhis - 1)
2915  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2916  &*BBI, Def, NewReg);
2917  }
2918  if (IsLast && np == NumPhis - 1)
2919  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2920  }
2921  }
2922  }
2923 }
2924 
2925 /// Remove instructions that generate values with no uses.
2926 /// Typically, these are induction variable operations that generate values
2927 /// used in the loop itself. A dead instruction has a definition with
2928 /// no uses, or uses that occur in the original loop only.
2929 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2930  MBBVectorTy &EpilogBBs) {
2931  // For each epilog block, check that the value defined by each instruction
2932  // is used. If not, delete it.
2933  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2934  MBE = EpilogBBs.rend();
2935  MBB != MBE; ++MBB)
2936  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2937  ME = (*MBB)->instr_rend();
2938  MI != ME;) {
2939  // From DeadMachineInstructionElem. Don't delete inline assembly.
2940  if (MI->isInlineAsm()) {
2941  ++MI;
2942  continue;
2943  }
2944  bool SawStore = false;
2945  // Check if it's safe to remove the instruction due to side effects.
2946  // We can, and want to, remove Phis here.
2947  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2948  ++MI;
2949  continue;
2950  }
2951  bool used = true;
2952  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2953  MOE = MI->operands_end();
2954  MOI != MOE; ++MOI) {
2955  if (!MOI->isReg() || !MOI->isDef())
2956  continue;
2957  unsigned reg = MOI->getReg();
2958  // Assume physical registers are used, unless they are marked dead.
2960  used = !MOI->isDead();
2961  if (used)
2962  break;
2963  continue;
2964  }
2965  unsigned realUses = 0;
2966  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2967  EI = MRI.use_end();
2968  UI != EI; ++UI) {
2969  // Check if there are any uses that occur only in the original
2970  // loop. If so, that's not a real use.
2971  if (UI->getParent()->getParent() != BB) {
2972  realUses++;
2973  used = true;
2974  break;
2975  }
2976  }
2977  if (realUses > 0)
2978  break;
2979  used = false;
2980  }
2981  if (!used) {
2983  MI++->eraseFromParent();
2984  continue;
2985  }
2986  ++MI;
2987  }
2988  // In the kernel block, check if we can remove a Phi that generates a value
2989  // used in an instruction removed in the epilog block.
2990  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2991  BBE = KernelBB->getFirstNonPHI();
2992  BBI != BBE;) {
2993  MachineInstr *MI = &*BBI;
2994  ++BBI;
2995  unsigned reg = MI->getOperand(0).getReg();
2996  if (MRI.use_begin(reg) == MRI.use_end()) {
2997  LIS.RemoveMachineInstrFromMaps(*MI);
2998  MI->eraseFromParent();
2999  }
3000  }
3001 }
3002 
3003 /// For loop carried definitions, we split the lifetime of a virtual register
3004 /// that has uses past the definition in the next iteration. A copy with a new
3005 /// virtual register is inserted before the definition, which helps with
3006 /// generating a better register assignment.
3007 ///
3008 /// v1 = phi(a, v2) v1 = phi(a, v2)
3009 /// v2 = phi(b, v3) v2 = phi(b, v3)
3010 /// v3 = .. v4 = copy v1
3011 /// .. = V1 v3 = ..
3012 /// .. = v4
3013 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
3014  MBBVectorTy &EpilogBBs,
3015  SMSchedule &Schedule) {
3017  for (auto &PHI : KernelBB->phis()) {
3018  unsigned Def = PHI.getOperand(0).getReg();
3019  // Check for any Phi definition that used as an operand of another Phi
3020  // in the same block.
3022  E = MRI.use_instr_end();
3023  I != E; ++I) {
3024  if (I->isPHI() && I->getParent() == KernelBB) {
3025  // Get the loop carried definition.
3026  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
3027  if (!LCDef)
3028  continue;
3029  MachineInstr *MI = MRI.getVRegDef(LCDef);
3030  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
3031  continue;
3032  // Search through the rest of the block looking for uses of the Phi
3033  // definition. If one occurs, then split the lifetime.
3034  unsigned SplitReg = 0;
3035  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
3036  KernelBB->instr_end()))
3037  if (BBJ.readsRegister(Def)) {
3038  // We split the lifetime when we find the first use.
3039  if (SplitReg == 0) {
3040  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
3041  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
3042  TII->get(TargetOpcode::COPY), SplitReg)
3043  .addReg(Def);
3044  }
3045  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
3046  }
3047  if (!SplitReg)
3048  continue;
3049  // Search through each of the epilog blocks for any uses to be renamed.
3050  for (auto &Epilog : EpilogBBs)
3051  for (auto &I : *Epilog)
3052  if (I.readsRegister(Def))
3053  I.substituteRegister(Def, SplitReg, 0, *TRI);
3054  break;
3055  }
3056  }
3057  }
3058 }
3059 
3060 /// Remove the incoming block from the Phis in a basic block.
3061 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
3062  for (MachineInstr &MI : *BB) {
3063  if (!MI.isPHI())
3064  break;
3065  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
3066  if (MI.getOperand(i + 1).getMBB() == Incoming) {
3067  MI.RemoveOperand(i + 1);
3068  MI.RemoveOperand(i);
3069  break;
3070  }
3071  }
3072 }
3073 
3074 /// Create branches from each prolog basic block to the appropriate epilog
3075 /// block. These edges are needed if the loop ends before reaching the
3076 /// kernel.
3077 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
3078  MachineBasicBlock *KernelBB,
3079  MBBVectorTy &EpilogBBs,
3080  SMSchedule &Schedule, ValueMapTy *VRMap) {
3081  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
3082  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
3083  MachineInstr *Cmp = Pass.LI.LoopCompare;
3084  MachineBasicBlock *LastPro = KernelBB;
3085  MachineBasicBlock *LastEpi = KernelBB;
3086 
3087  // Start from the blocks connected to the kernel and work "out"
3088  // to the first prolog and the last epilog blocks.
3090  unsigned MaxIter = PrologBBs.size() - 1;
3091  unsigned LC = UINT_MAX;
3092  unsigned LCMin = UINT_MAX;
3093  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3094  // Add branches to the prolog that go to the corresponding
3095  // epilog, and the fall-thru prolog/kernel block.
3096  MachineBasicBlock *Prolog = PrologBBs[j];
3097  MachineBasicBlock *Epilog = EpilogBBs[i];
3098  // We've executed one iteration, so decrement the loop count and check for
3099  // the loop end.
3101  // Check if the LOOP0 has already been removed. If so, then there is no need
3102  // to reduce the trip count.
3103  if (LC != 0)
3104  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3105  MaxIter);
3106 
3107  // Record the value of the first trip count, which is used to determine if
3108  // branches and blocks can be removed for constant trip counts.
3109  if (LCMin == UINT_MAX)
3110  LCMin = LC;
3111 
3112  unsigned numAdded = 0;
3114  Prolog->addSuccessor(Epilog);
3115  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3116  } else if (j >= LCMin) {
3117  Prolog->addSuccessor(Epilog);
3118  Prolog->removeSuccessor(LastPro);
3119  LastEpi->removeSuccessor(Epilog);
3120  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3121  removePhis(Epilog, LastEpi);
3122  // Remove the blocks that are no longer referenced.
3123  if (LastPro != LastEpi) {
3124  LastEpi->clear();
3125  LastEpi->eraseFromParent();
3126  }
3127  LastPro->clear();
3128  LastPro->eraseFromParent();
3129  } else {
3130  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3131  removePhis(Epilog, Prolog);
3132  }
3133  LastPro = Prolog;
3134  LastEpi = Epilog;
3136  E = Prolog->instr_rend();
3137  I != E && numAdded > 0; ++I, --numAdded)
3138  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3139  }
3140 }
3141 
3142 /// Return true if we can compute the amount the instruction changes
3143 /// during each iteration. Set Delta to the amount of the change.
3144 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3146  unsigned BaseReg;
3147  int64_t Offset;
3148  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
3149  return false;
3150 
3151  MachineRegisterInfo &MRI = MF.getRegInfo();
3152  // Check if there is a Phi. If so, get the definition in the loop.
3153  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3154  if (BaseDef && BaseDef->isPHI()) {
3155  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3156  BaseDef = MRI.getVRegDef(BaseReg);
3157  }
3158  if (!BaseDef)
3159  return false;
3160 
3161  int D = 0;
3162  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3163  return false;
3164 
3165  Delta = D;
3166  return true;
3167 }
3168 
3169 /// Update the memory operand with a new offset when the pipeliner
3170 /// generates a new copy of the instruction that refers to a
3171 /// different memory location.
3172 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3173  MachineInstr &OldMI, unsigned Num) {
3174  if (Num == 0)
3175  return;
3176  // If the instruction has memory operands, then adjust the offset
3177  // when the instruction appears in different stages.
3178  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
3179  if (NumRefs == 0)
3180  return;
3181  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
3182  unsigned Refs = 0;
3183  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3184  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3185  (!MMO->getValue())) {
3186  NewMemRefs[Refs++] = MMO;
3187  continue;
3188  }
3189  unsigned Delta;
3190  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
3191  int64_t AdjOffset = Delta * Num;
3192  NewMemRefs[Refs++] =
3193  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
3194  } else {
3195  NewMI.dropMemRefs();
3196  return;
3197  }
3198  }
3199  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
3200 }
3201 
3202 /// Clone the instruction for the new pipelined loop and update the
3203 /// memory operands, if needed.
3204 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3205  unsigned CurStageNum,
3206  unsigned InstStageNum) {
3207  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3208  // Check for tied operands in inline asm instructions. This should be handled
3209  // elsewhere, but I'm not sure of the best solution.
3210  if (OldMI->isInlineAsm())
3211  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3212  const auto &MO = OldMI->getOperand(i);
3213  if (MO.isReg() && MO.isUse())
3214  break;
3215  unsigned UseIdx;
3216  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3217  NewMI->tieOperands(i, UseIdx);
3218  }
3219  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3220  return NewMI;
3221 }
3222 
3223 /// Clone the instruction for the new pipelined loop. If needed, this
3224 /// function updates the instruction using the values saved in the
3225 /// InstrChanges structure.
3226 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3227  unsigned CurStageNum,
3228  unsigned InstStageNum,
3229  SMSchedule &Schedule) {
3230  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3232  InstrChanges.find(getSUnit(OldMI));
3233  if (It != InstrChanges.end()) {
3234  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3235  unsigned BasePos, OffsetPos;
3236  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3237  return nullptr;
3238  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3239  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3240  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3241  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3242  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3243  }
3244  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3245  return NewMI;
3246 }
3247 
3248 /// Update the machine instruction with new virtual registers. This
3249 /// function may change the defintions and/or uses.
3250 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3251  unsigned CurStageNum,
3252  unsigned InstrStageNum,
3253  SMSchedule &Schedule,
3254  ValueMapTy *VRMap) {
3255  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3256  MachineOperand &MO = NewMI->getOperand(i);
3258  continue;
3259  unsigned reg = MO.getReg();
3260  if (MO.isDef()) {
3261  // Create a new virtual register for the definition.
3262  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3263  unsigned NewReg = MRI.createVirtualRegister(RC);
3264  MO.setReg(NewReg);
3265  VRMap[CurStageNum][reg] = NewReg;
3266  if (LastDef)
3267  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3268  } else if (MO.isUse()) {
3269  MachineInstr *Def = MRI.getVRegDef(reg);
3270  // Compute the stage that contains the last definition for instruction.
3271  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3272  unsigned StageNum = CurStageNum;
3273  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3274  // Compute the difference in stages between the defintion and the use.
3275  unsigned StageDiff = (InstrStageNum - DefStageNum);
3276  // Make an adjustment to get the last definition.
3277  StageNum -= StageDiff;
3278  }
3279  if (VRMap[StageNum].count(reg))
3280  MO.setReg(VRMap[StageNum][reg]);
3281  }
3282  }
3283 }
3284 
3285 /// Return the instruction in the loop that defines the register.
3286 /// If the definition is a Phi, then follow the Phi operand to
3287 /// the instruction in the loop.
3288 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3290  MachineInstr *Def = MRI.getVRegDef(Reg);
3291  while (Def->isPHI()) {
3292  if (!Visited.insert(Def).second)
3293  break;
3294  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3295  if (Def->getOperand(i + 1).getMBB() == BB) {
3296  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3297  break;
3298  }
3299  }
3300  return Def;
3301 }
3302 
3303 /// Return the new name for the value from the previous stage.
3304 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3305  unsigned LoopVal, unsigned LoopStage,
3306  ValueMapTy *VRMap,
3307  MachineBasicBlock *BB) {
3308  unsigned PrevVal = 0;
3309  if (StageNum > PhiStage) {
3310  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3311  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3312  // The name is defined in the previous stage.
3313  PrevVal = VRMap[StageNum - 1][LoopVal];
3314  else if (VRMap[StageNum].count(LoopVal))
3315  // The previous name is defined in the current stage when the instruction
3316  // order is swapped.
3317  PrevVal = VRMap[StageNum][LoopVal];
3318  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3319  // The loop value hasn't yet been scheduled.
3320  PrevVal = LoopVal;
3321  else if (StageNum == PhiStage + 1)
3322  // The loop value is another phi, which has not been scheduled.
3323  PrevVal = getInitPhiReg(*LoopInst, BB);
3324  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3325  // The loop value is another phi, which has been scheduled.
3326  PrevVal =
3327  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3328  LoopStage, VRMap, BB);
3329  }
3330  return PrevVal;
3331 }
3332 
3333 /// Rewrite the Phi values in the specified block to use the mappings
3334 /// from the initial operand. Once the Phi is scheduled, we switch
3335 /// to using the loop value instead of the Phi value, so those names
3336 /// do not need to be rewritten.
3337 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3338  unsigned StageNum,
3339  SMSchedule &Schedule,
3340  ValueMapTy *VRMap,
3341  InstrMapTy &InstrMap) {
3342  for (auto &PHI : BB->phis()) {
3343  unsigned InitVal = 0;
3344  unsigned LoopVal = 0;
3345  getPhiRegs(PHI, BB, InitVal, LoopVal);
3346  unsigned PhiDef = PHI.getOperand(0).getReg();
3347 
3348  unsigned PhiStage =
3349  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3350  unsigned LoopStage =
3351  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3352  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3353  if (NumPhis > StageNum)
3354  NumPhis = StageNum;
3355  for (unsigned np = 0; np <= NumPhis; ++np) {
3356  unsigned NewVal =
3357  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3358  if (!NewVal)
3359  NewVal = InitVal;
3360  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3361  PhiDef, NewVal);
3362  }
3363  }
3364 }
3365 
3366 /// Rewrite a previously scheduled instruction to use the register value
3367 /// from the new instruction. Make sure the instruction occurs in the
3368 /// basic block, and we don't change the uses in the new instruction.
3369 void SwingSchedulerDAG::rewriteScheduledInstr(
3370  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3371  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3372  unsigned NewReg, unsigned PrevReg) {
3373  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3374  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3375  // Rewrite uses that have been scheduled already to use the new
3376  // Phi register.
3377  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3378  EI = MRI.use_end();
3379  UI != EI;) {
3380  MachineOperand &UseOp = *UI;
3381  MachineInstr *UseMI = UseOp.getParent();
3382  ++UI;
3383  if (UseMI->getParent() != BB)
3384  continue;
3385  if (UseMI->isPHI()) {
3386  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3387  continue;
3388  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3389  continue;
3390  }
3391  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3392  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3393  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3394  int StageSched = Schedule.stageScheduled(OrigMISU);
3395  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3396  unsigned ReplaceReg = 0;
3397  // This is the stage for the scheduled instruction.
3398  if (StagePhi == StageSched && Phi->isPHI()) {
3399  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3400  if (PrevReg && InProlog)
3401  ReplaceReg = PrevReg;
3402  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3403  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3404  ReplaceReg = PrevReg;
3405  else
3406  ReplaceReg = NewReg;
3407  }
3408  // The scheduled instruction occurs before the scheduled Phi, and the
3409  // Phi is not loop carried.
3410  if (!InProlog && StagePhi + 1 == StageSched &&
3411  !Schedule.isLoopCarried(this, *Phi))
3412  ReplaceReg = NewReg;
3413  if (StagePhi > StageSched && Phi->isPHI())
3414  ReplaceReg = NewReg;
3415  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3416  ReplaceReg = NewReg;
3417  if (ReplaceReg) {
3418  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3419  UseOp.setReg(ReplaceReg);
3420  }
3421  }
3422 }
3423 
3424 /// Check if we can change the instruction to use an offset value from the
3425 /// previous iteration. If so, return true and set the base and offset values
3426 /// so that we can rewrite the load, if necessary.
3427 /// v1 = Phi(v0, v3)
3428 /// v2 = load v1, 0
3429 /// v3 = post_store v1, 4, x
3430 /// This function enables the load to be rewritten as v2 = load v3, 4.
3431 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3432  unsigned &BasePos,
3433  unsigned &OffsetPos,
3434  unsigned &NewBase,
3435  int64_t &Offset) {
3436  // Get the load instruction.
3437  if (TII->isPostIncrement(*MI))
3438  return false;
3439  unsigned BasePosLd, OffsetPosLd;
3440  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3441  return false;
3442  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3443 
3444  // Look for the Phi instruction.
3445  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3446  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3447  if (!Phi || !Phi->isPHI())
3448  return false;
3449  // Get the register defined in the loop block.
3450  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3451  if (!PrevReg)
3452  return false;
3453 
3454  // Check for the post-increment load/store instruction.
3455  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3456  if (!PrevDef || PrevDef == MI)
3457  return false;
3458 
3459  if (!TII->isPostIncrement(*PrevDef))
3460  return false;
3461 
3462  unsigned BasePos1 = 0, OffsetPos1 = 0;
3463  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3464  return false;
3465 
3466  // Make sure that the instructions do not access the same memory location in
3467  // the next iteration.
3468  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3469  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3470  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3471  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3472  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3473  MF.DeleteMachineInstr(NewMI);
3474  if (!Disjoint)
3475  return false;
3476 
3477  // Set the return value once we determine that we return true.
3478  BasePos = BasePosLd;
3479  OffsetPos = OffsetPosLd;
3480  NewBase = PrevReg;
3481  Offset = StoreOffset;
3482  return true;
3483 }
3484 
3485 /// Apply changes to the instruction if needed. The changes are need
3486 /// to improve the scheduling and depend up on the final schedule.
3487 void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3488  SMSchedule &Schedule) {
3489  SUnit *SU = getSUnit(MI);
3491  InstrChanges.find(SU);
3492  if (It != InstrChanges.end()) {
3493  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3494  unsigned BasePos, OffsetPos;
3495  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3496  return;
3497  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3498  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3499  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3500  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3501  int BaseStageNum = Schedule.stageScheduled(SU);
3502  int BaseCycleNum = Schedule.cycleScheduled(SU);
3503  if (BaseStageNum < DefStageNum) {
3504  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3505  int OffsetDiff = DefStageNum - BaseStageNum;
3506  if (DefCycleNum < BaseCycleNum) {
3507  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3508  if (OffsetDiff > 0)
3509  --OffsetDiff;
3510  }
3511  int64_t NewOffset =
3512  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3513  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3514  SU->setInstr(NewMI);
3515  MISUnitMap[NewMI] = SU;
3516  NewMIs.insert(NewMI);
3517  }
3518  }
3519 }
3520 
3521 /// Return true for an order or output dependence that is loop carried
3522 /// potentially. A dependence is loop carried if the destination defines a valu
3523 /// that may be used or defined by the source in a subsequent iteration.
3524 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
3525  bool isSucc) {
3526  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3527  Dep.isArtificial())
3528  return false;
3529 
3530  if (!SwpPruneLoopCarried)
3531  return true;
3532 
3533  if (Dep.getKind() == SDep::Output)
3534  return true;
3535 
3536  MachineInstr *SI = Source->getInstr();
3537  MachineInstr *DI = Dep.getSUnit()->getInstr();
3538  if (!isSucc)
3539  std::swap(SI, DI);
3540  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3541 
3542  // Assume ordered loads and stores may have a loop carried dependence.
3543  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3545  return true;
3546 
3547  // Only chain dependences between a load and store can be loop carried.
3548  if (!DI->mayStore() || !SI->mayLoad())
3549  return false;
3550 
3551  unsigned DeltaS, DeltaD;
3552  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3553  return true;
3554 
3555  unsigned BaseRegS, BaseRegD;
3556  int64_t OffsetS, OffsetD;
3558  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
3559  !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
3560  return true;
3561 
3562  if (BaseRegS != BaseRegD)
3563  return true;
3564 
3565  // Check that the base register is incremented by a constant value for each
3566  // iteration.
3567  MachineInstr *Def = MRI.getVRegDef(BaseRegS);
3568  if (!Def || !Def->isPHI())
3569  return true;
3570  unsigned InitVal = 0;
3571  unsigned LoopVal = 0;
3572  getPhiRegs(*Def, BB, InitVal, LoopVal);
3573  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3574  int D = 0;
3575  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3576  return true;
3577 
3578  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3579  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3580 
3581  // This is the main test, which checks the offset values and the loop
3582  // increment value to determine if the accesses may be loop carried.
3583  if (OffsetS >= OffsetD)
3584  return OffsetS + AccessSizeS > DeltaS;
3585  else
3586  return OffsetD + AccessSizeD > DeltaD;
3587 
3588  return true;
3589 }
3590 
3591 void SwingSchedulerDAG::postprocessDAG() {
3592  for (auto &M : Mutations)
3593  M->apply(this);
3594 }
3595 
3596 /// Try to schedule the node at the specified StartCycle and continue
3597 /// until the node is schedule or the EndCycle is reached. This function
3598 /// returns true if the node is scheduled. This routine may search either
3599 /// forward or backward for a place to insert the instruction based upon
3600 /// the relative values of StartCycle and EndCycle.
3601 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3602  bool forward = true;
3603  if (StartCycle > EndCycle)
3604  forward = false;
3605 
3606  // The terminating condition depends on the direction.
3607  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3608  for (int curCycle = StartCycle; curCycle != termCycle;
3609  forward ? ++curCycle : --curCycle) {
3610 
3611  // Add the already scheduled instructions at the specified cycle to the DFA.
3612  Resources->clearResources();
3613  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3614  checkCycle <= LastCycle; checkCycle += II) {
3615  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3616 
3617  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3618  E = cycleInstrs.end();
3619  I != E; ++I) {
3620  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3621  continue;
3622  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3623  "These instructions have already been scheduled.");
3624  Resources->reserveResources(*(*I)->getInstr());
3625  }
3626  }
3627  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3628  Resources->canReserveResources(*SU->getInstr())) {
3629  LLVM_DEBUG({
3630  dbgs() << "\tinsert at cycle " << curCycle << " ";
3631  SU->getInstr()->dump();
3632  });
3633 
3634  ScheduledInstrs[curCycle].push_back(SU);
3635  InstrToCycle.insert(std::make_pair(SU, curCycle));
3636  if (curCycle > LastCycle)
3637  LastCycle = curCycle;
3638  if (curCycle < FirstCycle)
3639  FirstCycle = curCycle;
3640  return true;
3641  }
3642  LLVM_DEBUG({
3643  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3644  SU->getInstr()->dump();
3645  });
3646  }
3647  return false;
3648 }
3649 
3650 // Return the cycle of the earliest scheduled instruction in the chain.
3651 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3652  SmallPtrSet<SUnit *, 8> Visited;
3653  SmallVector<SDep, 8> Worklist;
3654  Worklist.push_back(Dep);
3655  int EarlyCycle = INT_MAX;
3656  while (!Worklist.empty()) {
3657  const SDep &Cur = Worklist.pop_back_val();
3658  SUnit *PrevSU = Cur.getSUnit();
3659  if (Visited.count(PrevSU))
3660  continue;
3661  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3662  if (it == InstrToCycle.end())
3663  continue;
3664  EarlyCycle = std::min(EarlyCycle, it->second);
3665  for (const auto &PI : PrevSU->Preds)
3666  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3667  Worklist.push_back(PI);
3668  Visited.insert(PrevSU);
3669  }
3670  return EarlyCycle;
3671 }
3672 
3673 // Return the cycle of the latest scheduled instruction in the chain.
3674 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3675  SmallPtrSet<SUnit *, 8> Visited;
3676  SmallVector<SDep, 8> Worklist;
3677  Worklist.push_back(Dep);
3678  int LateCycle = INT_MIN;
3679  while (!Worklist.empty()) {
3680  const SDep &Cur = Worklist.pop_back_val();
3681  SUnit *SuccSU = Cur.getSUnit();
3682  if (Visited.count(SuccSU))
3683  continue;
3684  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3685  if (it == InstrToCycle.end())
3686  continue;
3687  LateCycle = std::max(LateCycle, it->second);
3688  for (const auto &SI : SuccSU->Succs)
3689  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3690  Worklist.push_back(SI);
3691  Visited.insert(SuccSU);
3692  }
3693  return LateCycle;
3694 }
3695 
3696 /// If an instruction has a use that spans multiple iterations, then
3697 /// return true. These instructions are characterized by having a back-ege
3698 /// to a Phi, which contains a reference to another Phi.
3699 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3700  for (auto &P : SU->Preds)
3701  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3702  for (auto &S : P.getSUnit()->Succs)
3703  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3704  return P.getSUnit();
3705  return nullptr;
3706 }
3707 
3708 /// Compute the scheduling start slot for the instruction. The start slot
3709 /// depends on any predecessor or successor nodes scheduled already.
3710 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3711  int *MinEnd, int *MaxStart, int II,
3712  SwingSchedulerDAG *DAG) {
3713  // Iterate over each instruction that has been scheduled already. The start
3714  // slot computation depends on whether the previously scheduled instruction
3715  // is a predecessor or successor of the specified instruction.
3716  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3717 
3718  // Iterate over each instruction in the current cycle.
3719  for (SUnit *I : getInstructions(cycle)) {
3720  // Because we're processing a DAG for the dependences, we recognize
3721  // the back-edge in recurrences by anti dependences.
3722  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3723  const SDep &Dep = SU->Preds[i];
3724  if (Dep.getSUnit() == I) {
3725  if (!DAG->isBackedge(SU, Dep)) {
3726  int EarlyStart = cycle + Dep.getLatency() -
3727  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3728  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3729  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3730  int End = earliestCycleInChain(Dep) + (II - 1);
3731  *MinEnd = std::min(*MinEnd, End);
3732  }
3733  } else {
3734  int LateStart = cycle - Dep.getLatency() +
3735  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3736  *MinLateStart = std::min(*MinLateStart, LateStart);
3737  }
3738  }
3739  // For instruction that requires multiple iterations, make sure that
3740  // the dependent instruction is not scheduled past the definition.
3741  SUnit *BE = multipleIterations(I, DAG);
3742  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3743  !SU->isPred(I))
3744  *MinLateStart = std::min(*MinLateStart, cycle);
3745  }
3746  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3747  if (SU->Succs[i].getSUnit() == I) {
3748  const SDep &Dep = SU->Succs[i];
3749  if (!DAG->isBackedge(SU, Dep)) {
3750  int LateStart = cycle - Dep.getLatency() +
3751  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3752  *MinLateStart = std::min(*MinLateStart, LateStart);
3753  if (DAG->isLoopCarriedDep(SU, Dep)) {
3754  int Start = latestCycleInChain(Dep) + 1 - II;
3755  *MaxStart = std::max(*MaxStart, Start);
3756  }
3757  } else {
3758  int EarlyStart = cycle + Dep.getLatency() -
3759  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3760  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3761  }
3762  }
3763  }
3764  }
3765  }
3766 }
3767 
3768 /// Order the instructions within a cycle so that the definitions occur
3769 /// before the uses. Returns true if the instruction is added to the start
3770 /// of the list, or false if added to the end.
3771 void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3772  std::deque<SUnit *> &Insts) {
3773  MachineInstr *MI = SU->getInstr();
3774  bool OrderBeforeUse = false;
3775  bool OrderAfterDef = false;
3776  bool OrderBeforeDef = false;
3777  unsigned MoveDef = 0;
3778  unsigned MoveUse = 0;
3779  int StageInst1 = stageScheduled(SU);
3780 
3781  unsigned Pos = 0;
3782  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3783  ++I, ++Pos) {
3784  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3785  MachineOperand &MO = MI->getOperand(i);
3787  continue;
3788 
3789  unsigned Reg = MO.getReg();
3790  unsigned BasePos, OffsetPos;
3791  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3792  if (MI->getOperand(BasePos).getReg() == Reg)
3793  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3794  Reg = NewReg;
3795  bool Reads, Writes;
3796  std::tie(Reads, Writes) =
3797  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3798  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3799  OrderBeforeUse = true;
3800  if (MoveUse == 0)
3801  MoveUse = Pos;
3802  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3803  // Add the instruction after the scheduled instruction.
3804  OrderAfterDef = true;
3805  MoveDef = Pos;
3806  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3807  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3808  OrderBeforeUse = true;
3809  if (MoveUse == 0)
3810  MoveUse = Pos;
3811  } else {
3812  OrderAfterDef = true;
3813  MoveDef = Pos;
3814  }
3815  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3816  OrderBeforeUse = true;
3817  if (MoveUse == 0)
3818  MoveUse = Pos;
3819  if (MoveUse != 0) {
3820  OrderAfterDef = true;
3821  MoveDef = Pos - 1;
3822  }
3823  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3824  // Add the instruction before the scheduled instruction.
3825  OrderBeforeUse = true;
3826  if (MoveUse == 0)
3827  MoveUse = Pos;
3828  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3829  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3830  if (MoveUse == 0) {
3831  OrderBeforeDef = true;
3832  MoveUse = Pos;
3833  }
3834  }
3835  }
3836  // Check for order dependences between instructions. Make sure the source
3837  // is ordered before the destination.
3838  for (auto &S : SU->Succs) {
3839  if (S.getSUnit() != *I)
3840  continue;
3841  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3842  OrderBeforeUse = true;
3843  if (Pos < MoveUse)
3844  MoveUse = Pos;
3845  }
3846  }
3847  for (auto &P : SU->Preds) {
3848  if (P.getSUnit() != *I)
3849  continue;
3850  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3851  OrderAfterDef = true;
3852  MoveDef = Pos;
3853  }
3854  }
3855  }
3856 
3857  // A circular dependence.
3858  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3859  OrderBeforeUse = false;
3860 
3861  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3862  // to a loop-carried dependence.
3863  if (OrderBeforeDef)
3864  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3865 
3866  // The uncommon case when the instruction order needs to be updated because
3867  // there is both a use and def.
3868  if (OrderBeforeUse && OrderAfterDef) {
3869  SUnit *UseSU = Insts.at(MoveUse);
3870  SUnit *DefSU = Insts.at(MoveDef);
3871  if (MoveUse > MoveDef) {
3872  Insts.erase(Insts.begin() + MoveUse);
3873  Insts.erase(Insts.begin() + MoveDef);
3874  } else {
3875  Insts.erase(Insts.begin() + MoveDef);
3876  Insts.erase(Insts.begin() + MoveUse);
3877  }
3878  orderDependence(SSD, UseSU, Insts);
3879  orderDependence(SSD, SU, Insts);
3880  orderDependence(SSD, DefSU, Insts);
3881  return;
3882  }
3883  // Put the new instruction first if there is a use in the list. Otherwise,
3884  // put it at the end of the list.
3885  if (OrderBeforeUse)
3886  Insts.push_front(SU);
3887  else
3888  Insts.push_back(SU);
3889 }
3890 
3891 /// Return true if the scheduled Phi has a loop carried operand.
3892 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3893  if (!Phi.isPHI())
3894  return false;
3895  assert(Phi.isPHI() && "Expecting a Phi.");
3896  SUnit *DefSU = SSD->getSUnit(&Phi);
3897  unsigned DefCycle = cycleScheduled(DefSU);
3898  int DefStage = stageScheduled(DefSU);
3899 
3900  unsigned InitVal = 0;
3901  unsigned LoopVal = 0;
3902  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3903  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3904  if (!UseSU)
3905  return true;
3906  if (UseSU->getInstr()->isPHI())
3907  return true;
3908  unsigned LoopCycle = cycleScheduled(UseSU);
3909  int LoopStage = stageScheduled(UseSU);
3910  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3911 }
3912 
3913 /// Return true if the instruction is a definition that is loop carried
3914 /// and defines the use on the next iteration.
3915 /// v1 = phi(v2, v3)
3916 /// (Def) v3 = op v1
3917 /// (MO) = v1
3918 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3919 /// register.
3920 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
3922  if (!MO.isReg())
3923  return false;
3924  if (Def->isPHI())
3925  return false;
3926  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3927  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3928  return false;
3929  if (!isLoopCarried(SSD, *Phi))
3930  return false;
3931  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3932  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3933  MachineOperand &DMO = Def->getOperand(i);
3934  if (!DMO.isReg() || !DMO.isDef())
3935  continue;
3936  if (DMO.getReg() == LoopReg)
3937  return true;
3938  }
3939  return false;
3940 }
3941 
3942 // Check if the generated schedule is valid. This function checks if
3943 // an instruction that uses a physical register is scheduled in a
3944 // different stage than the definition. The pipeliner does not handle
3945 // physical register values that may cross a basic block boundary.
3946 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3947  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3948  SUnit &SU = SSD->SUnits[i];
3949  if (!SU.hasPhysRegDefs)
3950  continue;
3951  int StageDef = stageScheduled(&SU);
3952  assert(StageDef != -1 && "Instruction should have been scheduled.");
3953  for (auto &SI : SU.Succs)
3954  if (SI.isAssignedRegDep())
3955  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3956  if (stageScheduled(SI.getSUnit()) != StageDef)
3957  return false;
3958  }
3959  return true;
3960 }
3961 
3962 /// A property of the node order in swing-modulo-scheduling is
3963 /// that for nodes outside circuits the following holds:
3964 /// none of them is scheduled after both a successor and a
3965 /// predecessor.
3966 /// The method below checks whether the property is met.
3967 /// If not, debug information is printed and statistics information updated.
3968 /// Note that we do not use an assert statement.
3969 /// The reason is that although an invalid node oder may prevent
3970 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3971 /// it does not lead to the generation of incorrect code.
3972 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3973 
3974  // a sorted vector that maps each SUnit to its index in the NodeOrder
3975  typedef std::pair<SUnit *, unsigned> UnitIndex;
3976  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3977 
3978  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3979  Indices.push_back(std::make_pair(NodeOrder[i], i));
3980 
3981  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3982  return std::get<0>(i1) < std::get<0>(i2);
3983  };
3984 
3985  // sort, so that we can perform a binary search
3986  llvm::sort(Indices.begin(), Indices.end(), CompareKey);
3987 
3988  bool Valid = true;
3989  (void)Valid;
3990  // for each SUnit in the NodeOrder, check whether
3991  // it appears after both a successor and a predecessor
3992  // of the SUnit. If this is the case, and the SUnit
3993  // is not part of circuit, then the NodeOrder is not
3994  // valid.
3995  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3996  SUnit *SU = NodeOrder[i];
3997  unsigned Index = i;
3998 
3999  bool PredBefore = false;
4000  bool SuccBefore = false;
4001 
4002  SUnit *Succ;
4003  SUnit *Pred;
4004  (void)Succ;
4005  (void)Pred;
4006 
4007  for (SDep &PredEdge : SU->Preds) {
4008  SUnit *PredSU = PredEdge.getSUnit();
4009  unsigned PredIndex =
4010  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4011  std::make_pair(PredSU, 0), CompareKey));
4012  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
4013  PredBefore = true;
4014  Pred = PredSU;
4015  break;
4016  }
4017  }
4018 
4019  for (SDep &SuccEdge : SU->Succs) {
4020  SUnit *SuccSU = SuccEdge.getSUnit();
4021  unsigned SuccIndex =
4022  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4023  std::make_pair(SuccSU, 0), CompareKey));
4024  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
4025  SuccBefore = true;
4026  Succ = SuccSU;
4027  break;
4028  }
4029  }
4030 
4031  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
4032  // instructions in circuits are allowed to be scheduled
4033  // after both a successor and predecessor.
4034  bool InCircuit = std::any_of(
4035  Circuits.begin(), Circuits.end(),
4036  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
4037  if (InCircuit)
4038  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
4039  else {
4040  Valid = false;
4041  NumNodeOrderIssues++;
4042  LLVM_DEBUG(dbgs() << "Predecessor ";);
4043  }
4044  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
4045  << " are scheduled before node " << SU->NodeNum
4046  << "\n";);
4047  }
4048  }
4049 
4050  LLVM_DEBUG({
4051  if (!Valid)
4052  dbgs() << "Invalid node order found!\n";
4053  });
4054 }
4055 
4056 /// Attempt to fix the degenerate cases when the instruction serialization
4057 /// causes the register lifetimes to overlap. For example,
4058 /// p' = store_pi(p, b)
4059 /// = load p, offset
4060 /// In this case p and p' overlap, which means that two registers are needed.
4061 /// Instead, this function changes the load to use p' and updates the offset.
4062 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
4063  unsigned OverlapReg = 0;
4064  unsigned NewBaseReg = 0;
4065  for (SUnit *SU : Instrs) {
4066  MachineInstr *MI = SU->getInstr();
4067  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4068  const MachineOperand &MO = MI->getOperand(i);
4069  // Look for an instruction that uses p. The instruction occurs in the
4070  // same cycle but occurs later in the serialized order.
4071  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
4072  // Check that the instruction appears in the InstrChanges structure,
4073  // which contains instructions that can have the offset updated.
4075  InstrChanges.find(SU);
4076  if (It != InstrChanges.end()) {
4077  unsigned BasePos, OffsetPos;
4078  // Update the base register and adjust the offset.
4079  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
4080  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
4081  NewMI->getOperand(BasePos).setReg(NewBaseReg);
4082  int64_t NewOffset =
4083  MI->getOperand(OffsetPos).getImm() - It->second.second;
4084  NewMI->getOperand(OffsetPos).setImm(NewOffset);
4085  SU->setInstr(NewMI);
4086  MISUnitMap[NewMI] = SU;
4087  NewMIs.insert(NewMI);
4088  }
4089  }
4090  OverlapReg = 0;
4091  NewBaseReg = 0;
4092  break;
4093  }
4094  // Look for an instruction of the form p' = op(p), which uses and defines
4095  // two virtual registers that get allocated to the same physical register.
4096  unsigned TiedUseIdx = 0;
4097  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
4098  // OverlapReg is p in the example above.
4099  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
4100  // NewBaseReg is p' in the example above.
4101  NewBaseReg = MI->getOperand(i).getReg();
4102  break;
4103  }
4104  }
4105  }
4106 }
4107 
4108 /// After the schedule has been formed, call this function to combine
4109 /// the instructions from the different stages/cycles. That is, this
4110 /// function creates a schedule that represents a single iteration.
4111 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
4112  // Move all instructions to the first stage from later stages.
4113  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4114  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
4115  ++stage) {
4116  std::deque<SUnit *> &cycleInstrs =
4117  ScheduledInstrs[cycle + (stage * InitiationInterval)];
4118  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
4119  E = cycleInstrs.rend();
4120  I != E; ++I)
4121  ScheduledInstrs[cycle].push_front(*I);
4122  }
4123  }
4124  // Iterate over the definitions in each instruction, and compute the
4125  // stage difference for each use. Keep the maximum value.
4126  for (auto &I : InstrToCycle) {
4127  int DefStage = stageScheduled(I.first);
4128  MachineInstr *MI = I.first->getInstr();
4129  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4130  MachineOperand &Op = MI->getOperand(i);
4131  if (!Op.isReg() || !Op.isDef())
4132  continue;
4133 
4134  unsigned Reg = Op.getReg();
4135  unsigned MaxDiff = 0;
4136  bool PhiIsSwapped = false;
4137  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
4138  EI = MRI.use_end();
4139  UI != EI; ++UI) {
4140  MachineOperand &UseOp = *UI;
4141  MachineInstr *UseMI = UseOp.getParent();
4142  SUnit *SUnitUse = SSD->getSUnit(UseMI);
4143  int UseStage = stageScheduled(SUnitUse);
4144  unsigned Diff = 0;
4145  if (UseStage != -1 && UseStage >= DefStage)
4146  Diff = UseStage - DefStage;
4147  if (MI->isPHI()) {
4148  if (isLoopCarried(SSD, *MI))
4149  ++Diff;
4150  else
4151  PhiIsSwapped = true;
4152  }
4153  MaxDiff = std::max(Diff, MaxDiff);
4154  }
4155  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
4156  }
4157  }
4158 
4159  // Erase all the elements in the later stages. Only one iteration should
4160  // remain in the scheduled list, and it contains all the instructions.
4161  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
4162  ScheduledInstrs.erase(cycle);
4163 
4164  // Change the registers in instruction as specified in the InstrChanges
4165  // map. We need to use the new registers to create the correct order.
4166  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
4167  SUnit *SU = &SSD->SUnits[i];
4168  SSD->applyInstrChange(SU->getInstr(), *this);
4169  }
4170 
4171  // Reorder the instructions in each cycle to fix and improve the
4172  // generated code.
4173  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
4174  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
4175  std::deque<SUnit *> newOrderPhi;
4176  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4177  SUnit *SU = cycleInstrs[i];
4178  if (SU->getInstr()->isPHI())
4179  newOrderPhi.push_back(SU);
4180  }
4181  std::deque<SUnit *> newOrderI;
4182  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4183  SUnit *SU = cycleInstrs[i];
4184  if (!SU->getInstr()->isPHI())
4185  orderDependence(SSD, SU, newOrderI);
4186  }
4187  // Replace the old order with the new order.
4188  cycleInstrs.swap(newOrderPhi);
4189  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
4190  SSD->fixupRegisterOverlaps(cycleInstrs);
4191  }
4192 
4193  LLVM_DEBUG(dump(););
4194 }
4195 
4196 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4197 /// Print the schedule information to the given output.
4198 void SMSchedule::print(raw_ostream &os) const {
4199  // Iterate over each cycle.
4200  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4201  // Iterate over each instruction in the cycle.
4202  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
4203  for (SUnit *CI : cycleInstrs->second) {
4204  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
4205  os << "(" << CI->NodeNum << ") ";
4206  CI->getInstr()->print(os);
4207  os << "\n";
4208  }
4209  }
4210 }
4211 
4212 /// Utility function used for debugging to print the schedule.
4213 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
4214 #endif
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:755
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
BitVector & set()
Definition: BitVector.h:398
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:356
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:89
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:485
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:241
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void dump(const ScheduleDAG *G) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isInlineAsm() const
Definition: MachineInstr.h:867
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:423
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
bool isPHI() const
Definition: MachineInstr.h:861
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:265
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:264
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:222
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:100
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:424
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:308
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
SlotIndexes pass.
Definition: SlotIndexes.h:331
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1004
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:148
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:974
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
TargetInstrInfo - Interface to description of machine instruction set.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
const Value * getValue() const
Return the base address of the memory access.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:577
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:566
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:672
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:431
BitVector & reset()
Definition: BitVector.h:439
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:915
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:948
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:50
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
print lazy value Lazy Value Info Printer Pass
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1032
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:416
void dropMemRefs()
Clear this MachineInstr&#39;s memory reference descriptor list.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:748
void clear()
Completely clear the SetVector.
Definition: SetVector.h:216
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:267
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1953
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:60
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:659
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:355
A vector that has set insertion semantics.
Definition: SetVector.h:41
static use_instr_iterator use_instr_end()
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:200
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1951
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:119
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr&#39;s memory reference descriptor list.
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:417
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:436