LLVM  4.0.0
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and explioits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/MapVector.h"
66 #include "llvm/ADT/PriorityQueue.h"
67 #include "llvm/ADT/SetVector.h"
68 #include "llvm/ADT/SmallPtrSet.h"
69 #include "llvm/ADT/SmallSet.h"
70 #include "llvm/ADT/SmallVector.h"
71 #include "llvm/ADT/Statistic.h"
93 #include "llvm/IR/Attributes.h"
94 #include "llvm/IR/DebugLoc.h"
97 #include "llvm/PassRegistry.h"
98 #include "llvm/PassSupport.h"
100 #include "llvm/Support/Debug.h"
101 #include "llvm/Support/MathExtras.h"
106 #include <algorithm>
107 #include <cassert>
108 #include <climits>
109 #include <cstdint>
110 #include <deque>
111 #include <functional>
112 #include <iterator>
113 #include <map>
114 #include <tuple>
115 #include <utility>
116 #include <vector>
117 
118 using namespace llvm;
119 
120 #define DEBUG_TYPE "pipeliner"
121 
122 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
123 STATISTIC(NumPipelined, "Number of loops software pipelined");
124 
125 /// A command line option to turn software pipelining on or off.
126 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
128  cl::desc("Enable Software Pipelining"));
129 
130 /// A command line option to enable SWP at -Os.
131 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
132  cl::desc("Enable SWP at Os."), cl::Hidden,
133  cl::init(false));
134 
135 /// A command line argument to limit minimum initial interval for pipelining.
136 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
137  cl::desc("Size limit for the the MII."),
138  cl::Hidden, cl::init(27));
139 
140 /// A command line argument to limit the number of stages in the pipeline.
141 static cl::opt<int>
142  SwpMaxStages("pipeliner-max-stages",
143  cl::desc("Maximum stages allowed in the generated scheduled."),
144  cl::Hidden, cl::init(3));
145 
146 /// A command line option to disable the pruning of chain dependences due to
147 /// an unrelated Phi.
148 static cl::opt<bool>
149  SwpPruneDeps("pipeliner-prune-deps",
150  cl::desc("Prune dependences between unrelated Phi nodes."),
151  cl::Hidden, cl::init(true));
152 
153 /// A command line option to disable the pruning of loop carried order
154 /// dependences.
155 static cl::opt<bool>
156  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
157  cl::desc("Prune loop carried order dependences."),
158  cl::Hidden, cl::init(true));
159 
160 #ifndef NDEBUG
161 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
162 #endif
163 
164 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
165  cl::ReallyHidden, cl::init(false),
166  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
167 
168 namespace {
169 
170 class NodeSet;
171 class SMSchedule;
172 class SwingSchedulerDAG;
173 
174 /// The main class in the implementation of the target independent
175 /// software pipeliner pass.
176 class MachinePipeliner : public MachineFunctionPass {
177 public:
178  MachineFunction *MF = nullptr;
179  const MachineLoopInfo *MLI = nullptr;
180  const MachineDominatorTree *MDT = nullptr;
181  const InstrItineraryData *InstrItins;
182  const TargetInstrInfo *TII = nullptr;
183  RegisterClassInfo RegClassInfo;
184 
185 #ifndef NDEBUG
186  static int NumTries;
187 #endif
188  /// Cache the target analysis information about the loop.
189  struct LoopInfo {
190  MachineBasicBlock *TBB = nullptr;
191  MachineBasicBlock *FBB = nullptr;
193  MachineInstr *LoopInductionVar = nullptr;
194  MachineInstr *LoopCompare = nullptr;
195  };
196  LoopInfo LI;
197 
198  static char ID;
199  MachinePipeliner() : MachineFunctionPass(ID) {
201  }
202 
203  bool runOnMachineFunction(MachineFunction &MF) override;
204 
205  void getAnalysisUsage(AnalysisUsage &AU) const override {
212  }
213 
214 private:
215  bool canPipelineLoop(MachineLoop &L);
216  bool scheduleLoop(MachineLoop &L);
217  bool swingModuloScheduler(MachineLoop &L);
218 };
219 
220 /// This class builds the dependence graph for the instructions in a loop,
221 /// and attempts to schedule the instructions using the SMS algorithm.
222 class SwingSchedulerDAG : public ScheduleDAGInstrs {
223  MachinePipeliner &Pass;
224  /// The minimum initiation interval between iterations for this schedule.
225  unsigned MII;
226  /// Set to true if a valid pipelined schedule is found for the loop.
227  bool Scheduled;
228  MachineLoop &Loop;
229  LiveIntervals &LIS;
230  const RegisterClassInfo &RegClassInfo;
231 
232  /// A toplogical ordering of the SUnits, which is needed for changing
233  /// dependences and iterating over the SUnits.
235 
236  struct NodeInfo {
237  int ASAP;
238  int ALAP;
239  NodeInfo() : ASAP(0), ALAP(0) {}
240  };
241  /// Computed properties for each node in the graph.
242  std::vector<NodeInfo> ScheduleInfo;
243 
244  enum OrderKind { BottomUp = 0, TopDown = 1 };
245  /// Computed node ordering for scheduling.
247 
248  typedef SmallVector<NodeSet, 8> NodeSetType;
249  typedef DenseMap<unsigned, unsigned> ValueMapTy;
250  typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
251  typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
252 
253  /// Instructions to change when emitting the final schedule.
255 
256  /// We may create a new instruction, so remember it because it
257  /// must be deleted when the pass is finished.
259 
260  /// Ordered list of DAG postprocessing steps.
261  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
262 
263  /// Helper class to implement Johnson's circuit finding algorithm.
264  class Circuits {
265  std::vector<SUnit> &SUnits;
267  BitVector Blocked;
270  unsigned NumPaths;
271  static unsigned MaxPaths;
272 
273  public:
274  Circuits(std::vector<SUnit> &SUs)
275  : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
276  AdjK(SUs.size()) {}
277  /// Reset the data structures used in the circuit algorithm.
278  void reset() {
279  Stack.clear();
280  Blocked.reset();
281  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
282  NumPaths = 0;
283  }
284  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
285  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
286  void unblock(int U);
287  };
288 
289 public:
290  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
291  const RegisterClassInfo &rci)
292  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
293  Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
294  Topo(SUnits, &ExitSU) {
295  P.MF->getSubtarget().getSMSMutations(Mutations);
296  }
297 
298  void schedule() override;
299  void finishBlock() override;
300 
301  /// Return true if the loop kernel has been scheduled.
302  bool hasNewSchedule() { return Scheduled; }
303 
304  /// Return the earliest time an instruction may be scheduled.
305  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
306 
307  /// Return the latest time an instruction my be scheduled.
308  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
309 
310  /// The mobility function, which the the number of slots in which
311  /// an instruction may be scheduled.
312  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
313 
314  /// The depth, in the dependence graph, for a node.
315  int getDepth(SUnit *Node) { return Node->getDepth(); }
316 
317  /// The height, in the dependence graph, for a node.
318  int getHeight(SUnit *Node) { return Node->getHeight(); }
319 
320  /// Return true if the dependence is a back-edge in the data dependence graph.
321  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
322  /// using an anti dependence from a Phi to an instruction.
323  bool isBackedge(SUnit *Source, const SDep &Dep) {
324  if (Dep.getKind() != SDep::Anti)
325  return false;
326  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
327  }
328 
329  /// Return true if the dependence is an order dependence between non-Phis.
330  static bool isOrder(SUnit *Source, const SDep &Dep) {
331  if (Dep.getKind() != SDep::Order)
332  return false;
333  return (!Source->getInstr()->isPHI() &&
334  !Dep.getSUnit()->getInstr()->isPHI());
335  }
336 
337  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
338 
339  /// The latency of the dependence.
340  unsigned getLatency(SUnit *Source, const SDep &Dep) {
341  // Anti dependences represent recurrences, so use the latency of the
342  // instruction on the back-edge.
343  if (Dep.getKind() == SDep::Anti) {
344  if (Source->getInstr()->isPHI())
345  return Dep.getSUnit()->Latency;
346  if (Dep.getSUnit()->getInstr()->isPHI())
347  return Source->Latency;
348  return Dep.getLatency();
349  }
350  return Dep.getLatency();
351  }
352 
353  /// The distance function, which indicates that operation V of iteration I
354  /// depends on operations U of iteration I-distance.
355  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
356  // Instructions that feed a Phi have a distance of 1. Computing larger
357  // values for arrays requires data dependence information.
358  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
359  return 1;
360  return 0;
361  }
362 
363  /// Set the Minimum Initiation Interval for this schedule attempt.
364  void setMII(unsigned mii) { MII = mii; }
365 
366  MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
367  bool UpdateDAG = false);
368 
369  /// Return the new base register that was stored away for the changed
370  /// instruction.
371  unsigned getInstrBaseReg(SUnit *SU) {
373  InstrChanges.find(SU);
374  if (It != InstrChanges.end())
375  return It->second.first;
376  return 0;
377  }
378 
379  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
380  Mutations.push_back(std::move(Mutation));
381  }
382 
383 private:
384  void addLoopCarriedDependences(AliasAnalysis *AA);
385  void updatePhiDependences();
386  void changeDependences();
387  unsigned calculateResMII();
388  unsigned calculateRecMII(NodeSetType &RecNodeSets);
389  void findCircuits(NodeSetType &NodeSets);
390  void fuseRecs(NodeSetType &NodeSets);
391  void removeDuplicateNodes(NodeSetType &NodeSets);
392  void computeNodeFunctions(NodeSetType &NodeSets);
393  void registerPressureFilter(NodeSetType &NodeSets);
394  void colocateNodeSets(NodeSetType &NodeSets);
395  void checkNodeSets(NodeSetType &NodeSets);
396  void groupRemainingNodes(NodeSetType &NodeSets);
397  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
398  SetVector<SUnit *> &NodesAdded);
399  void computeNodeOrder(NodeSetType &NodeSets);
400  bool schedulePipeline(SMSchedule &Schedule);
401  void generatePipelinedLoop(SMSchedule &Schedule);
402  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
403  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
404  MBBVectorTy &PrologBBs);
405  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
406  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
407  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
408  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
409  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
410  SMSchedule &Schedule, ValueMapTy *VRMap,
411  InstrMapTy &InstrMap, unsigned LastStageNum,
412  unsigned CurStageNum, bool IsLast);
413  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
414  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
415  SMSchedule &Schedule, ValueMapTy *VRMap,
416  InstrMapTy &InstrMap, unsigned LastStageNum,
417  unsigned CurStageNum, bool IsLast);
418  void removeDeadInstructions(MachineBasicBlock *KernelBB,
419  MBBVectorTy &EpilogBBs);
420  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
421  SMSchedule &Schedule);
422  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
423  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
424  ValueMapTy *VRMap);
425  bool computeDelta(MachineInstr &MI, unsigned &Delta);
426  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
427  unsigned Num);
428  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
429  unsigned InstStageNum);
430  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
431  unsigned InstStageNum,
432  SMSchedule &Schedule);
433  void updateInstruction(MachineInstr *NewMI, bool LastDef,
434  unsigned CurStageNum, unsigned InstStageNum,
435  SMSchedule &Schedule, ValueMapTy *VRMap);
436  MachineInstr *findDefInLoop(unsigned Reg);
437  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
438  unsigned LoopStage, ValueMapTy *VRMap,
439  MachineBasicBlock *BB);
440  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
441  SMSchedule &Schedule, ValueMapTy *VRMap,
442  InstrMapTy &InstrMap);
443  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
444  InstrMapTy &InstrMap, unsigned CurStageNum,
445  unsigned PhiNum, MachineInstr *Phi,
446  unsigned OldReg, unsigned NewReg,
447  unsigned PrevReg = 0);
448  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
449  unsigned &OffsetPos, unsigned &NewBase,
450  int64_t &NewOffset);
451  void postprocessDAG();
452 };
453 
454 /// A NodeSet contains a set of SUnit DAG nodes with additional information
455 /// that assigns a priority to the set.
456 class NodeSet {
457  SetVector<SUnit *> Nodes;
458  bool HasRecurrence;
459  unsigned RecMII = 0;
460  int MaxMOV = 0;
461  int MaxDepth = 0;
462  unsigned Colocate = 0;
463  SUnit *ExceedPressure = nullptr;
464 
465 public:
466  typedef SetVector<SUnit *>::const_iterator iterator;
467 
468  NodeSet() : Nodes(), HasRecurrence(false) {}
469 
470  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
471 
472  bool insert(SUnit *SU) { return Nodes.insert(SU); }
473 
474  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
475 
476  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
477  return Nodes.remove_if(P);
478  }
479 
480  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
481 
482  bool hasRecurrence() { return HasRecurrence; };
483 
484  unsigned size() const { return Nodes.size(); }
485 
486  bool empty() const { return Nodes.empty(); }
487 
488  SUnit *getNode(unsigned i) const { return Nodes[i]; };
489 
490  void setRecMII(unsigned mii) { RecMII = mii; };
491 
492  void setColocate(unsigned c) { Colocate = c; };
493 
494  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
495 
496  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
497 
498  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
499 
500  int getRecMII() { return RecMII; }
501 
502  /// Summarize node functions for the entire node set.
503  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
504  for (SUnit *SU : *this) {
505  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
506  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
507  }
508  }
509 
510  void clear() {
511  Nodes.clear();
512  RecMII = 0;
513  HasRecurrence = false;
514  MaxMOV = 0;
515  MaxDepth = 0;
516  Colocate = 0;
517  ExceedPressure = nullptr;
518  }
519 
520  operator SetVector<SUnit *> &() { return Nodes; }
521 
522  /// Sort the node sets by importance. First, rank them by recurrence MII,
523  /// then by mobility (least mobile done first), and finally by depth.
524  /// Each node set may contain a colocate value which is used as the first
525  /// tie breaker, if it's set.
526  bool operator>(const NodeSet &RHS) const {
527  if (RecMII == RHS.RecMII) {
528  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
529  return Colocate < RHS.Colocate;
530  if (MaxMOV == RHS.MaxMOV)
531  return MaxDepth > RHS.MaxDepth;
532  return MaxMOV < RHS.MaxMOV;
533  }
534  return RecMII > RHS.RecMII;
535  }
536 
537  bool operator==(const NodeSet &RHS) const {
538  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
539  MaxDepth == RHS.MaxDepth;
540  }
541 
542  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
543 
544  iterator begin() { return Nodes.begin(); }
545  iterator end() { return Nodes.end(); }
546 
547  void print(raw_ostream &os) const {
548  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
549  << " depth " << MaxDepth << " col " << Colocate << "\n";
550  for (const auto &I : Nodes)
551  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
552  os << "\n";
553  }
554 
555  void dump() const { print(dbgs()); }
556 };
557 
558 /// This class repesents the scheduled code. The main data structure is a
559 /// map from scheduled cycle to instructions. During scheduling, the
560 /// data structure explicitly represents all stages/iterations. When
561 /// the algorithm finshes, the schedule is collapsed into a single stage,
562 /// which represents instructions from different loop iterations.
563 ///
564 /// The SMS algorithm allows negative values for cycles, so the first cycle
565 /// in the schedule is the smallest cycle value.
566 class SMSchedule {
567 private:
568  /// Map from execution cycle to instructions.
569  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
570 
571  /// Map from instruction to execution cycle.
572  std::map<SUnit *, int> InstrToCycle;
573 
574  /// Map for each register and the max difference between its uses and def.
575  /// The first element in the pair is the max difference in stages. The
576  /// second is true if the register defines a Phi value and loop value is
577  /// scheduled before the Phi.
578  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
579 
580  /// Keep track of the first cycle value in the schedule. It starts
581  /// as zero, but the algorithm allows negative values.
582  int FirstCycle;
583 
584  /// Keep track of the last cycle value in the schedule.
585  int LastCycle;
586 
587  /// The initiation interval (II) for the schedule.
588  int InitiationInterval;
589 
590  /// Target machine information.
591  const TargetSubtargetInfo &ST;
592 
593  /// Virtual register information.
595 
596  DFAPacketizer *Resources;
597 
598 public:
599  SMSchedule(MachineFunction *mf)
600  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
601  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
602  FirstCycle = 0;
603  LastCycle = 0;
604  InitiationInterval = 0;
605  }
606 
607  ~SMSchedule() {
608  ScheduledInstrs.clear();
609  InstrToCycle.clear();
610  RegToStageDiff.clear();
611  delete Resources;
612  }
613 
614  void reset() {
615  ScheduledInstrs.clear();
616  InstrToCycle.clear();
617  RegToStageDiff.clear();
618  FirstCycle = 0;
619  LastCycle = 0;
620  InitiationInterval = 0;
621  }
622 
623  /// Set the initiation interval for this schedule.
624  void setInitiationInterval(int ii) { InitiationInterval = ii; }
625 
626  /// Return the first cycle in the completed schedule. This
627  /// can be a negative value.
628  int getFirstCycle() const { return FirstCycle; }
629 
630  /// Return the last cycle in the finalized schedule.
631  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
632 
633  /// Return the cycle of the earliest scheduled instruction in the dependence
634  /// chain.
635  int earliestCycleInChain(const SDep &Dep);
636 
637  /// Return the cycle of the latest scheduled instruction in the dependence
638  /// chain.
639  int latestCycleInChain(const SDep &Dep);
640 
641  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
642  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
643  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
644 
645  /// Iterators for the cycle to instruction map.
646  typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
647  typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
648  const_sched_iterator;
649 
650  /// Return true if the instruction is scheduled at the specified stage.
651  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
652  return (stageScheduled(SU) == (int)StageNum);
653  }
654 
655  /// Return the stage for a scheduled instruction. Return -1 if
656  /// the instruction has not been scheduled.
657  int stageScheduled(SUnit *SU) const {
658  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
659  if (it == InstrToCycle.end())
660  return -1;
661  return (it->second - FirstCycle) / InitiationInterval;
662  }
663 
664  /// Return the cycle for a scheduled instruction. This function normalizes
665  /// the first cycle to be 0.
666  unsigned cycleScheduled(SUnit *SU) const {
667  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
668  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
669  return (it->second - FirstCycle) % InitiationInterval;
670  }
671 
672  /// Return the maximum stage count needed for this schedule.
673  unsigned getMaxStageCount() {
674  return (LastCycle - FirstCycle) / InitiationInterval;
675  }
676 
677  /// Return the max. number of stages/iterations that can occur between a
678  /// register definition and its uses.
679  unsigned getStagesForReg(int Reg, unsigned CurStage) {
680  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
681  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
682  return 1;
683  return Stages.first;
684  }
685 
686  /// The number of stages for a Phi is a little different than other
687  /// instructions. The minimum value computed in RegToStageDiff is 1
688  /// because we assume the Phi is needed for at least 1 iteration.
689  /// This is not the case if the loop value is scheduled prior to the
690  /// Phi in the same stage. This function returns the number of stages
691  /// or iterations needed between the Phi definition and any uses.
692  unsigned getStagesForPhi(int Reg) {
693  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
694  if (Stages.second)
695  return Stages.first;
696  return Stages.first - 1;
697  }
698 
699  /// Return the instructions that are scheduled at the specified cycle.
700  std::deque<SUnit *> &getInstructions(int cycle) {
701  return ScheduledInstrs[cycle];
702  }
703 
704  bool isValidSchedule(SwingSchedulerDAG *SSD);
705  void finalizeSchedule(SwingSchedulerDAG *SSD);
706  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
707  std::deque<SUnit *> &Insts);
708  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
709  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
710  MachineOperand &MO);
711  void print(raw_ostream &os) const;
712  void dump() const;
713 };
714 
715 } // end anonymous namespace
716 
717 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
718 char MachinePipeliner::ID = 0;
719 #ifndef NDEBUG
720 int MachinePipeliner::NumTries = 0;
721 #endif
723 INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
724  "Modulo Software Pipelining", false, false)
729 INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
730  "Modulo Software Pipelining", false, false)
731 
732 /// The "main" function for implementing Swing Modulo Scheduling.
733 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
734  if (skipFunction(*mf.getFunction()))
735  return false;
736 
737  if (!EnableSWP)
738  return false;
739 
740  if (mf.getFunction()->getAttributes().hasAttribute(
741  AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
742  !EnableSWPOptSize.getPosition())
743  return false;
744 
745  MF = &mf;
746  MLI = &getAnalysis<MachineLoopInfo>();
747  MDT = &getAnalysis<MachineDominatorTree>();
748  TII = MF->getSubtarget().getInstrInfo();
749  RegClassInfo.runOnMachineFunction(*MF);
750 
751  for (auto &L : *MLI)
752  scheduleLoop(*L);
753 
754  return false;
755 }
756 
757 /// Attempt to perform the SMS algorithm on the specified loop. This function is
758 /// the main entry point for the algorithm. The function identifies candidate
759 /// loops, calculates the minimum initiation interval, and attempts to schedule
760 /// the loop.
761 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
762  bool Changed = false;
763  for (auto &InnerLoop : L)
764  Changed |= scheduleLoop(*InnerLoop);
765 
766 #ifndef NDEBUG
767  // Stop trying after reaching the limit (if any).
768  int Limit = SwpLoopLimit;
769  if (Limit >= 0) {
770  if (NumTries >= SwpLoopLimit)
771  return Changed;
772  NumTries++;
773  }
774 #endif
775 
776  if (!canPipelineLoop(L))
777  return Changed;
778 
779  ++NumTrytoPipeline;
780 
781  Changed = swingModuloScheduler(L);
782 
783  return Changed;
784 }
785 
786 /// Return true if the loop can be software pipelined. The algorithm is
787 /// restricted to loops with a single basic block. Make sure that the
788 /// branch in the loop can be analyzed.
789 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
790  if (L.getNumBlocks() != 1)
791  return false;
792 
793  // Check if the branch can't be understood because we can't do pipelining
794  // if that's the case.
795  LI.TBB = nullptr;
796  LI.FBB = nullptr;
797  LI.BrCond.clear();
798  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
799  return false;
800 
801  LI.LoopInductionVar = nullptr;
802  LI.LoopCompare = nullptr;
803  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
804  return false;
805 
806  if (!L.getLoopPreheader())
807  return false;
808 
809  // If any of the Phis contain subregs, then we can't pipeline
810  // because we don't know how to maintain subreg information in the
811  // VMap structure.
813  for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
814  BBE = MBB->getFirstNonPHI();
815  BBI != BBE; ++BBI)
816  for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
817  if (BBI->getOperand(i).getSubReg() != 0)
818  return false;
819 
820  return true;
821 }
822 
823 /// The SMS algorithm consists of the following main steps:
824 /// 1. Computation and analysis of the dependence graph.
825 /// 2. Ordering of the nodes (instructions).
826 /// 3. Attempt to Schedule the loop.
827 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
828  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
829 
830  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
831 
832  MachineBasicBlock *MBB = L.getHeader();
833  // The kernel should not include any terminator instructions. These
834  // will be added back later.
835  SMS.startBlock(MBB);
836 
837  // Compute the number of 'real' instructions in the basic block by
838  // ignoring terminators.
839  unsigned size = MBB->size();
841  E = MBB->instr_end();
842  I != E; ++I, --size)
843  ;
844 
845  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
846  SMS.schedule();
847  SMS.exitRegion();
848 
849  SMS.finishBlock();
850  return SMS.hasNewSchedule();
851 }
852 
853 /// We override the schedule function in ScheduleDAGInstrs to implement the
854 /// scheduling part of the Swing Modulo Scheduling algorithm.
855 void SwingSchedulerDAG::schedule() {
856  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
857  buildSchedGraph(AA);
858  addLoopCarriedDependences(AA);
859  updatePhiDependences();
860  Topo.InitDAGTopologicalSorting();
861  postprocessDAG();
862  changeDependences();
863  DEBUG({
864  for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
865  SUnits[su].dumpAll(this);
866  });
867 
868  NodeSetType NodeSets;
869  findCircuits(NodeSets);
870 
871  // Calculate the MII.
872  unsigned ResMII = calculateResMII();
873  unsigned RecMII = calculateRecMII(NodeSets);
874 
875  fuseRecs(NodeSets);
876 
877  // This flag is used for testing and can cause correctness problems.
878  if (SwpIgnoreRecMII)
879  RecMII = 0;
880 
881  MII = std::max(ResMII, RecMII);
882  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
883  << ")\n");
884 
885  // Can't schedule a loop without a valid MII.
886  if (MII == 0)
887  return;
888 
889  // Don't pipeline large loops.
890  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
891  return;
892 
893  computeNodeFunctions(NodeSets);
894 
895  registerPressureFilter(NodeSets);
896 
897  colocateNodeSets(NodeSets);
898 
899  checkNodeSets(NodeSets);
900 
901  DEBUG({
902  for (auto &I : NodeSets) {
903  dbgs() << " Rec NodeSet ";
904  I.dump();
905  }
906  });
907 
908  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
909 
910  groupRemainingNodes(NodeSets);
911 
912  removeDuplicateNodes(NodeSets);
913 
914  DEBUG({
915  for (auto &I : NodeSets) {
916  dbgs() << " NodeSet ";
917  I.dump();
918  }
919  });
920 
921  computeNodeOrder(NodeSets);
922 
923  SMSchedule Schedule(Pass.MF);
924  Scheduled = schedulePipeline(Schedule);
925 
926  if (!Scheduled)
927  return;
928 
929  unsigned numStages = Schedule.getMaxStageCount();
930  // No need to generate pipeline if there are no overlapped iterations.
931  if (numStages == 0)
932  return;
933 
934  // Check that the maximum stage count is less than user-defined limit.
935  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
936  return;
937 
938  generatePipelinedLoop(Schedule);
939  ++NumPipelined;
940 }
941 
942 /// Clean up after the software pipeliner runs.
943 void SwingSchedulerDAG::finishBlock() {
944  for (MachineInstr *I : NewMIs)
945  MF.DeleteMachineInstr(I);
946  NewMIs.clear();
947 
948  // Call the superclass.
950 }
951 
952 /// Return the register values for the operands of a Phi instruction.
953 /// This function assume the instruction is a Phi.
955  unsigned &InitVal, unsigned &LoopVal) {
956  assert(Phi.isPHI() && "Expecting a Phi.");
957 
958  InitVal = 0;
959  LoopVal = 0;
960  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
961  if (Phi.getOperand(i + 1).getMBB() != Loop)
962  InitVal = Phi.getOperand(i).getReg();
963  else if (Phi.getOperand(i + 1).getMBB() == Loop)
964  LoopVal = Phi.getOperand(i).getReg();
965 
966  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
967 }
968 
969 /// Return the Phi register value that comes from the incoming block.
970 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
971  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
972  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
973  return Phi.getOperand(i).getReg();
974  return 0;
975 }
976 
977 /// Return the Phi register value that comes the the loop block.
978 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
979  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
980  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
981  return Phi.getOperand(i).getReg();
982  return 0;
983 }
984 
985 /// Return true if SUb can be reached from SUa following the chain edges.
986 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
987  SmallPtrSet<SUnit *, 8> Visited;
988  SmallVector<SUnit *, 8> Worklist;
989  Worklist.push_back(SUa);
990  while (!Worklist.empty()) {
991  const SUnit *SU = Worklist.pop_back_val();
992  for (auto &SI : SU->Succs) {
993  SUnit *SuccSU = SI.getSUnit();
994  if (SI.getKind() == SDep::Order) {
995  if (Visited.count(SuccSU))
996  continue;
997  if (SuccSU == SUb)
998  return true;
999  Worklist.push_back(SuccSU);
1000  Visited.insert(SuccSU);
1001  }
1002  }
1003  }
1004  return false;
1005 }
1006 
1007 /// Return true if the instruction causes a chain between memory
1008 /// references before and after it.
1010  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1011  (MI.hasOrderedMemoryRef() &&
1012  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1013 }
1014 
1015 /// Return the underlying objects for the memory references of an instruction.
1016 /// This function calls the code in ValueTracking, but first checks that the
1017 /// instruction has a memory operand.
1020  const DataLayout &DL) {
1021  if (!MI->hasOneMemOperand())
1022  return;
1023  MachineMemOperand *MM = *MI->memoperands_begin();
1024  if (!MM->getValue())
1025  return;
1026  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1027 }
1028 
1029 /// Add a chain edge between a load and store if the store can be an
1030 /// alias of the load on a subsequent iteration, i.e., a loop carried
1031 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1032 /// but that code doesn't create loop carried dependences.
1033 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1035  for (auto &SU : SUnits) {
1036  MachineInstr &MI = *SU.getInstr();
1037  if (isDependenceBarrier(MI, AA))
1038  PendingLoads.clear();
1039  else if (MI.mayLoad()) {
1041  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1042  for (auto V : Objs) {
1043  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1044  SUs.push_back(&SU);
1045  }
1046  } else if (MI.mayStore()) {
1048  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1049  for (auto V : Objs) {
1051  PendingLoads.find(V);
1052  if (I == PendingLoads.end())
1053  continue;
1054  for (auto Load : I->second) {
1055  if (isSuccOrder(Load, &SU))
1056  continue;
1057  MachineInstr &LdMI = *Load->getInstr();
1058  // First, perform the cheaper check that compares the base register.
1059  // If they are the same and the load offset is less than the store
1060  // offset, then mark the dependence as loop carried potentially.
1061  unsigned BaseReg1, BaseReg2;
1062  int64_t Offset1, Offset2;
1063  if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
1064  !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
1066  continue;
1067  }
1068  if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
1069  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1070  "What happened to the chain edge?");
1072  continue;
1073  }
1074  // Second, the more expensive check that uses alias analysis on the
1075  // base registers. If they alias, and the load offset is less than
1076  // the store offset, the mark the dependence as loop carried.
1077  if (!AA) {
1079  continue;
1080  }
1081  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1082  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1083  if (!MMO1->getValue() || !MMO2->getValue()) {
1085  continue;
1086  }
1087  if (MMO1->getValue() == MMO2->getValue() &&
1088  MMO1->getOffset() <= MMO2->getOffset()) {
1090  continue;
1091  }
1092  AliasResult AAResult = AA->alias(
1094  MMO1->getAAInfo()),
1096  MMO2->getAAInfo()));
1097 
1098  if (AAResult != NoAlias)
1100  }
1101  }
1102  }
1103  }
1104 }
1105 
1106 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1107 /// processes dependences for PHIs. This function adds true dependences
1108 /// from a PHI to a use, and a loop carried dependence from the use to the
1109 /// PHI. The loop carried dependence is represented as an anti dependence
1110 /// edge. This function also removes chain dependences between unrelated
1111 /// PHIs.
1112 void SwingSchedulerDAG::updatePhiDependences() {
1113  SmallVector<SDep, 4> RemoveDeps;
1114  const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
1115 
1116  // Iterate over each DAG node.
1117  for (SUnit &I : SUnits) {
1118  RemoveDeps.clear();
1119  // Set to true if the instruction has an operand defined by a Phi.
1120  unsigned HasPhiUse = 0;
1121  unsigned HasPhiDef = 0;
1122  MachineInstr *MI = I.getInstr();
1123  // Iterate over each operand, and we process the definitions.
1124  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1125  MOE = MI->operands_end();
1126  MOI != MOE; ++MOI) {
1127  if (!MOI->isReg())
1128  continue;
1129  unsigned Reg = MOI->getReg();
1130  if (MOI->isDef()) {
1131  // If the register is used by a Phi, then create an anti dependence.
1133  UI = MRI.use_instr_begin(Reg),
1134  UE = MRI.use_instr_end();
1135  UI != UE; ++UI) {
1136  MachineInstr *UseMI = &*UI;
1137  SUnit *SU = getSUnit(UseMI);
1138  if (SU != nullptr && UseMI->isPHI()) {
1139  if (!MI->isPHI()) {
1140  SDep Dep(SU, SDep::Anti, Reg);
1141  I.addPred(Dep);
1142  } else {
1143  HasPhiDef = Reg;
1144  // Add a chain edge to a dependent Phi that isn't an existing
1145  // predecessor.
1146  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1147  I.addPred(SDep(SU, SDep::Barrier));
1148  }
1149  }
1150  }
1151  } else if (MOI->isUse()) {
1152  // If the register is defined by a Phi, then create a true dependence.
1153  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1154  if (DefMI == nullptr)
1155  continue;
1156  SUnit *SU = getSUnit(DefMI);
1157  if (SU != nullptr && DefMI->isPHI()) {
1158  if (!MI->isPHI()) {
1159  SDep Dep(SU, SDep::Data, Reg);
1160  Dep.setLatency(0);
1161  ST.adjustSchedDependency(SU, &I, Dep);
1162  I.addPred(Dep);
1163  } else {
1164  HasPhiUse = Reg;
1165  // Add a chain edge to a dependent Phi that isn't an existing
1166  // predecessor.
1167  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1168  I.addPred(SDep(SU, SDep::Barrier));
1169  }
1170  }
1171  }
1172  }
1173  // Remove order dependences from an unrelated Phi.
1174  if (!SwpPruneDeps)
1175  continue;
1176  for (auto &PI : I.Preds) {
1177  MachineInstr *PMI = PI.getSUnit()->getInstr();
1178  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1179  if (I.getInstr()->isPHI()) {
1180  if (PMI->getOperand(0).getReg() == HasPhiUse)
1181  continue;
1182  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1183  continue;
1184  }
1185  RemoveDeps.push_back(PI);
1186  }
1187  }
1188  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1189  I.removePred(RemoveDeps[i]);
1190  }
1191 }
1192 
1193 /// Iterate over each DAG node and see if we can change any dependences
1194 /// in order to reduce the recurrence MII.
1195 void SwingSchedulerDAG::changeDependences() {
1196  // See if an instruction can use a value from the previous iteration.
1197  // If so, we update the base and offset of the instruction and change
1198  // the dependences.
1199  for (SUnit &I : SUnits) {
1200  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1201  int64_t NewOffset = 0;
1202  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1203  NewOffset))
1204  continue;
1205 
1206  // Get the MI and SUnit for the instruction that defines the original base.
1207  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1208  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1209  if (!DefMI)
1210  continue;
1211  SUnit *DefSU = getSUnit(DefMI);
1212  if (!DefSU)
1213  continue;
1214  // Get the MI and SUnit for the instruction that defins the new base.
1215  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1216  if (!LastMI)
1217  continue;
1218  SUnit *LastSU = getSUnit(LastMI);
1219  if (!LastSU)
1220  continue;
1221 
1222  if (Topo.IsReachable(&I, LastSU))
1223  continue;
1224 
1225  // Remove the dependence. The value now depends on a prior iteration.
1226  SmallVector<SDep, 4> Deps;
1227  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1228  ++P)
1229  if (P->getSUnit() == DefSU)
1230  Deps.push_back(*P);
1231  for (int i = 0, e = Deps.size(); i != e; i++) {
1232  Topo.RemovePred(&I, Deps[i].getSUnit());
1233  I.removePred(Deps[i]);
1234  }
1235  // Remove the chain dependence between the instructions.
1236  Deps.clear();
1237  for (auto &P : LastSU->Preds)
1238  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1239  Deps.push_back(P);
1240  for (int i = 0, e = Deps.size(); i != e; i++) {
1241  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1242  LastSU->removePred(Deps[i]);
1243  }
1244 
1245  // Add a dependence between the new instruction and the instruction
1246  // that defines the new base.
1247  SDep Dep(&I, SDep::Anti, NewBase);
1248  LastSU->addPred(Dep);
1249 
1250  // Remember the base and offset information so that we can update the
1251  // instruction during code generation.
1252  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1253  }
1254 }
1255 
1256 namespace {
1257 
1258 // FuncUnitSorter - Comparison operator used to sort instructions by
1259 // the number of functional unit choices.
1260 struct FuncUnitSorter {
1261  const InstrItineraryData *InstrItins;
1262  DenseMap<unsigned, unsigned> Resources;
1263 
1264  // Compute the number of functional unit alternatives needed
1265  // at each stage, and take the minimum value. We prioritize the
1266  // instructions by the least number of choices first.
1267  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1268  unsigned schedClass = Inst->getDesc().getSchedClass();
1269  unsigned min = UINT_MAX;
1270  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1271  *IE = InstrItins->endStage(schedClass);
1272  IS != IE; ++IS) {
1273  unsigned funcUnits = IS->getUnits();
1274  unsigned numAlternatives = countPopulation(funcUnits);
1275  if (numAlternatives < min) {
1276  min = numAlternatives;
1277  F = funcUnits;
1278  }
1279  }
1280  return min;
1281  }
1282 
1283  // Compute the critical resources needed by the instruction. This
1284  // function records the functional units needed by instructions that
1285  // must use only one functional unit. We use this as a tie breaker
1286  // for computing the resource MII. The instrutions that require
1287  // the same, highly used, functional unit have high priority.
1288  void calcCriticalResources(MachineInstr &MI) {
1289  unsigned SchedClass = MI.getDesc().getSchedClass();
1290  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1291  *IE = InstrItins->endStage(SchedClass);
1292  IS != IE; ++IS) {
1293  unsigned FuncUnits = IS->getUnits();
1294  if (countPopulation(FuncUnits) == 1)
1295  Resources[FuncUnits]++;
1296  }
1297  }
1298 
1299  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1300  /// Return true if IS1 has less priority than IS2.
1301  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1302  unsigned F1 = 0, F2 = 0;
1303  unsigned MFUs1 = minFuncUnits(IS1, F1);
1304  unsigned MFUs2 = minFuncUnits(IS2, F2);
1305  if (MFUs1 == 1 && MFUs2 == 1)
1306  return Resources.lookup(F1) < Resources.lookup(F2);
1307  return MFUs1 > MFUs2;
1308  }
1309 };
1310 
1311 } // end anonymous namespace
1312 
1313 /// Calculate the resource constrained minimum initiation interval for the
1314 /// specified loop. We use the DFA to model the resources needed for
1315 /// each instruction, and we ignore dependences. A different DFA is created
1316 /// for each cycle that is required. When adding a new instruction, we attempt
1317 /// to add it to each existing DFA, until a legal space is found. If the
1318 /// instruction cannot be reserved in an existing DFA, we create a new one.
1319 unsigned SwingSchedulerDAG::calculateResMII() {
1321  MachineBasicBlock *MBB = Loop.getHeader();
1322  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1323 
1324  // Sort the instructions by the number of available choices for scheduling,
1325  // least to most. Use the number of critical resources as the tie breaker.
1326  FuncUnitSorter FUS =
1327  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1329  E = MBB->getFirstTerminator();
1330  I != E; ++I)
1331  FUS.calcCriticalResources(*I);
1333  FuncUnitOrder(FUS);
1334 
1336  E = MBB->getFirstTerminator();
1337  I != E; ++I)
1338  FuncUnitOrder.push(&*I);
1339 
1340  while (!FuncUnitOrder.empty()) {
1341  MachineInstr *MI = FuncUnitOrder.top();
1342  FuncUnitOrder.pop();
1343  if (TII->isZeroCost(MI->getOpcode()))
1344  continue;
1345  // Attempt to reserve the instruction in an existing DFA. At least one
1346  // DFA is needed for each cycle.
1347  unsigned NumCycles = getSUnit(MI)->Latency;
1348  unsigned ReservedCycles = 0;
1351  for (unsigned C = 0; C < NumCycles; ++C)
1352  while (RI != RE) {
1353  if ((*RI++)->canReserveResources(*MI)) {
1354  ++ReservedCycles;
1355  break;
1356  }
1357  }
1358  // Start reserving resources using existing DFAs.
1359  for (unsigned C = 0; C < ReservedCycles; ++C) {
1360  --RI;
1361  (*RI)->reserveResources(*MI);
1362  }
1363  // Add new DFAs, if needed, to reserve resources.
1364  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1365  DFAPacketizer *NewResource =
1366  TII->CreateTargetScheduleState(MF.getSubtarget());
1367  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1368  NewResource->reserveResources(*MI);
1369  Resources.push_back(NewResource);
1370  }
1371  }
1372  int Resmii = Resources.size();
1373  // Delete the memory for each of the DFAs that were created earlier.
1374  for (DFAPacketizer *RI : Resources) {
1375  DFAPacketizer *D = RI;
1376  delete D;
1377  }
1378  Resources.clear();
1379  return Resmii;
1380 }
1381 
1382 /// Calculate the recurrence-constrainted minimum initiation interval.
1383 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1384 /// for each circuit. The II needs to satisfy the inequality
1385 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1386 /// II that satistifies the inequality, and the RecMII is the maximum
1387 /// of those values.
1388 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1389  unsigned RecMII = 0;
1390 
1391  for (NodeSet &Nodes : NodeSets) {
1392  if (Nodes.size() == 0)
1393  continue;
1394 
1395  unsigned Delay = Nodes.size() - 1;
1396  unsigned Distance = 1;
1397 
1398  // ii = ceil(delay / distance)
1399  unsigned CurMII = (Delay + Distance - 1) / Distance;
1400  Nodes.setRecMII(CurMII);
1401  if (CurMII > RecMII)
1402  RecMII = CurMII;
1403  }
1404 
1405  return RecMII;
1406 }
1407 
1408 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1409 /// but we do this to find the circuits, and then change them back.
1410 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1412  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1413  SUnit *SU = &SUnits[i];
1414  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1415  IP != EP; ++IP) {
1416  if (IP->getKind() != SDep::Anti)
1417  continue;
1418  DepsAdded.push_back(std::make_pair(SU, *IP));
1419  }
1420  }
1421  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1422  E = DepsAdded.end();
1423  I != E; ++I) {
1424  // Remove this anti dependency and add one in the reverse direction.
1425  SUnit *SU = I->first;
1426  SDep &D = I->second;
1427  SUnit *TargetSU = D.getSUnit();
1428  unsigned Reg = D.getReg();
1429  unsigned Lat = D.getLatency();
1430  SU->removePred(D);
1431  SDep Dep(SU, SDep::Anti, Reg);
1432  Dep.setLatency(Lat);
1433  TargetSU->addPred(Dep);
1434  }
1435 }
1436 
1437 /// Create the adjacency structure of the nodes in the graph.
1438 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1439  SwingSchedulerDAG *DAG) {
1440  BitVector Added(SUnits.size());
1441  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1442  Added.reset();
1443  // Add any successor to the adjacency matrix and exclude duplicates.
1444  for (auto &SI : SUnits[i].Succs) {
1445  // Do not process a boundary node and a back-edge is processed only
1446  // if it goes to a Phi.
1447  if (SI.getSUnit()->isBoundaryNode() ||
1448  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1449  continue;
1450  int N = SI.getSUnit()->NodeNum;
1451  if (!Added.test(N)) {
1452  AdjK[i].push_back(N);
1453  Added.set(N);
1454  }
1455  }
1456  // A chain edge between a store and a load is treated as a back-edge in the
1457  // adjacency matrix.
1458  for (auto &PI : SUnits[i].Preds) {
1459  if (!SUnits[i].getInstr()->mayStore() ||
1460  !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
1461  continue;
1462  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1463  int N = PI.getSUnit()->NodeNum;
1464  if (!Added.test(N)) {
1465  AdjK[i].push_back(N);
1466  Added.set(N);
1467  }
1468  }
1469  }
1470  }
1471 }
1472 
1473 /// Identify an elementary circuit in the dependence graph starting at the
1474 /// specified node.
1475 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1476  bool HasBackedge) {
1477  SUnit *SV = &SUnits[V];
1478  bool F = false;
1479  Stack.insert(SV);
1480  Blocked.set(V);
1481 
1482  for (auto W : AdjK[V]) {
1483  if (NumPaths > MaxPaths)
1484  break;
1485  if (W < S)
1486  continue;
1487  if (W == S) {
1488  if (!HasBackedge)
1489  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1490  F = true;
1491  ++NumPaths;
1492  break;
1493  } else if (!Blocked.test(W)) {
1494  if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
1495  F = true;
1496  }
1497  }
1498 
1499  if (F)
1500  unblock(V);
1501  else {
1502  for (auto W : AdjK[V]) {
1503  if (W < S)
1504  continue;
1505  if (B[W].count(SV) == 0)
1506  B[W].insert(SV);
1507  }
1508  }
1509  Stack.pop_back();
1510  return F;
1511 }
1512 
1513 /// Unblock a node in the circuit finding algorithm.
1514 void SwingSchedulerDAG::Circuits::unblock(int U) {
1515  Blocked.reset(U);
1516  SmallPtrSet<SUnit *, 4> &BU = B[U];
1517  while (!BU.empty()) {
1519  assert(SI != BU.end() && "Invalid B set.");
1520  SUnit *W = *SI;
1521  BU.erase(W);
1522  if (Blocked.test(W->NodeNum))
1523  unblock(W->NodeNum);
1524  }
1525 }
1526 
1527 /// Identify all the elementary circuits in the dependence graph using
1528 /// Johnson's circuit algorithm.
1529 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1530  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1531  // but we do this to find the circuits, and then change them back.
1532  swapAntiDependences(SUnits);
1533 
1534  Circuits Cir(SUnits);
1535  // Create the adjacency structure.
1536  Cir.createAdjacencyStructure(this);
1537  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1538  Cir.reset();
1539  Cir.circuit(i, i, NodeSets);
1540  }
1541 
1542  // Change the dependences back so that we've created a DAG again.
1543  swapAntiDependences(SUnits);
1544 }
1545 
1546 /// Return true for DAG nodes that we ignore when computing the cost functions.
1547 /// We ignore the back-edge recurrence in order to avoid unbounded recurison
1548 /// in the calculation of the ASAP, ALAP, etc functions.
1549 static bool ignoreDependence(const SDep &D, bool isPred) {
1550  if (D.isArtificial())
1551  return true;
1552  return D.getKind() == SDep::Anti && isPred;
1553 }
1554 
1555 /// Compute several functions need to order the nodes for scheduling.
1556 /// ASAP - Earliest time to schedule a node.
1557 /// ALAP - Latest time to schedule a node.
1558 /// MOV - Mobility function, difference between ALAP and ASAP.
1559 /// D - Depth of each node.
1560 /// H - Height of each node.
1561 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1562 
1563  ScheduleInfo.resize(SUnits.size());
1564 
1565  DEBUG({
1567  E = Topo.end();
1568  I != E; ++I) {
1569  SUnit *SU = &SUnits[*I];
1570  SU->dump(this);
1571  }
1572  });
1573 
1574  int maxASAP = 0;
1575  // Compute ASAP.
1577  E = Topo.end();
1578  I != E; ++I) {
1579  int asap = 0;
1580  SUnit *SU = &SUnits[*I];
1581  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1582  EP = SU->Preds.end();
1583  IP != EP; ++IP) {
1584  if (ignoreDependence(*IP, true))
1585  continue;
1586  SUnit *pred = IP->getSUnit();
1587  asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
1588  getDistance(pred, SU, *IP) * MII));
1589  }
1590  maxASAP = std::max(maxASAP, asap);
1591  ScheduleInfo[*I].ASAP = asap;
1592  }
1593 
1594  // Compute ALAP and MOV.
1596  E = Topo.rend();
1597  I != E; ++I) {
1598  int alap = maxASAP;
1599  SUnit *SU = &SUnits[*I];
1600  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1601  ES = SU->Succs.end();
1602  IS != ES; ++IS) {
1603  if (ignoreDependence(*IS, true))
1604  continue;
1605  SUnit *succ = IS->getSUnit();
1606  alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
1607  getDistance(SU, succ, *IS) * MII));
1608  }
1609 
1610  ScheduleInfo[*I].ALAP = alap;
1611  }
1612 
1613  // After computing the node functions, compute the summary for each node set.
1614  for (NodeSet &I : NodeSets)
1615  I.computeNodeSetInfo(this);
1616 
1617  DEBUG({
1618  for (unsigned i = 0; i < SUnits.size(); i++) {
1619  dbgs() << "\tNode " << i << ":\n";
1620  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1621  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1622  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1623  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1624  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1625  }
1626  });
1627 }
1628 
1629 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1630 /// as the predecessors of the elements of NodeOrder that are not also in
1631 /// NodeOrder.
1634  const NodeSet *S = nullptr) {
1635  Preds.clear();
1636  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1637  I != E; ++I) {
1638  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1639  PI != PE; ++PI) {
1640  if (S && S->count(PI->getSUnit()) == 0)
1641  continue;
1642  if (ignoreDependence(*PI, true))
1643  continue;
1644  if (NodeOrder.count(PI->getSUnit()) == 0)
1645  Preds.insert(PI->getSUnit());
1646  }
1647  // Back-edges are predecessors with an anti-dependence.
1648  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1649  ES = (*I)->Succs.end();
1650  IS != ES; ++IS) {
1651  if (IS->getKind() != SDep::Anti)
1652  continue;
1653  if (S && S->count(IS->getSUnit()) == 0)
1654  continue;
1655  if (NodeOrder.count(IS->getSUnit()) == 0)
1656  Preds.insert(IS->getSUnit());
1657  }
1658  }
1659  return Preds.size() > 0;
1660 }
1661 
1662 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1663 /// as the successors of the elements of NodeOrder that are not also in
1664 /// NodeOrder.
1667  const NodeSet *S = nullptr) {
1668  Succs.clear();
1669  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1670  I != E; ++I) {
1671  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1672  SI != SE; ++SI) {
1673  if (S && S->count(SI->getSUnit()) == 0)
1674  continue;
1675  if (ignoreDependence(*SI, false))
1676  continue;
1677  if (NodeOrder.count(SI->getSUnit()) == 0)
1678  Succs.insert(SI->getSUnit());
1679  }
1680  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1681  PE = (*I)->Preds.end();
1682  PI != PE; ++PI) {
1683  if (PI->getKind() != SDep::Anti)
1684  continue;
1685  if (S && S->count(PI->getSUnit()) == 0)
1686  continue;
1687  if (NodeOrder.count(PI->getSUnit()) == 0)
1688  Succs.insert(PI->getSUnit());
1689  }
1690  }
1691  return Succs.size() > 0;
1692 }
1693 
1694 /// Return true if there is a path from the specified node to any of the nodes
1695 /// in DestNodes. Keep track and return the nodes in any path.
1696 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1697  SetVector<SUnit *> &DestNodes,
1698  SetVector<SUnit *> &Exclude,
1699  SmallPtrSet<SUnit *, 8> &Visited) {
1700  if (Cur->isBoundaryNode())
1701  return false;
1702  if (Exclude.count(Cur) != 0)
1703  return false;
1704  if (DestNodes.count(Cur) != 0)
1705  return true;
1706  if (!Visited.insert(Cur).second)
1707  return Path.count(Cur) != 0;
1708  bool FoundPath = false;
1709  for (auto &SI : Cur->Succs)
1710  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1711  for (auto &PI : Cur->Preds)
1712  if (PI.getKind() == SDep::Anti)
1713  FoundPath |=
1714  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1715  if (FoundPath)
1716  Path.insert(Cur);
1717  return FoundPath;
1718 }
1719 
1720 /// Return true if Set1 is a subset of Set2.
1721 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1722  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1723  if (Set2.count(*I) == 0)
1724  return false;
1725  return true;
1726 }
1727 
1728 /// Compute the live-out registers for the instructions in a node-set.
1729 /// The live-out registers are those that are defined in the node-set,
1730 /// but not used. Except for use operands of Phis.
1732  NodeSet &NS) {
1733  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1736  SmallSet<unsigned, 4> Uses;
1737  for (SUnit *SU : NS) {
1738  const MachineInstr *MI = SU->getInstr();
1739  if (MI->isPHI())
1740  continue;
1741  for (const MachineOperand &MO : MI->operands())
1742  if (MO.isReg() && MO.isUse()) {
1743  unsigned Reg = MO.getReg();
1745  Uses.insert(Reg);
1746  else if (MRI.isAllocatable(Reg))
1747  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1748  Uses.insert(*Units);
1749  }
1750  }
1751  for (SUnit *SU : NS)
1752  for (const MachineOperand &MO : SU->getInstr()->operands())
1753  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1754  unsigned Reg = MO.getReg();
1756  if (!Uses.count(Reg))
1757  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1759  } else if (MRI.isAllocatable(Reg)) {
1760  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1761  if (!Uses.count(*Units))
1762  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1764  }
1765  }
1766  RPTracker.addLiveRegs(LiveOutRegs);
1767 }
1768 
1769 /// A heuristic to filter nodes in recurrent node-sets if the register
1770 /// pressure of a set is too high.
1771 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1772  for (auto &NS : NodeSets) {
1773  // Skip small node-sets since they won't cause register pressure problems.
1774  if (NS.size() <= 2)
1775  continue;
1776  IntervalPressure RecRegPressure;
1777  RegPressureTracker RecRPTracker(RecRegPressure);
1778  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1779  computeLiveOuts(MF, RecRPTracker, NS);
1780  RecRPTracker.closeBottom();
1781 
1782  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1783  std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
1784  return A->NodeNum > B->NodeNum;
1785  });
1786 
1787  for (auto &SU : SUnits) {
1788  // Since we're computing the register pressure for a subset of the
1789  // instructions in a block, we need to set the tracker for each
1790  // instruction in the node-set. The tracker is set to the instruction
1791  // just after the one we're interested in.
1793  RecRPTracker.setPos(std::next(CurInstI));
1794 
1795  RegPressureDelta RPDelta;
1796  ArrayRef<PressureChange> CriticalPSets;
1797  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1798  CriticalPSets,
1799  RecRegPressure.MaxSetPressure);
1800  if (RPDelta.Excess.isValid()) {
1801  DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1802  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1803  << ":" << RPDelta.Excess.getUnitInc());
1804  NS.setExceedPressure(SU);
1805  break;
1806  }
1807  RecRPTracker.recede();
1808  }
1809  }
1810 }
1811 
1812 /// A heuristic to colocate node sets that have the same set of
1813 /// successors.
1814 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1815  unsigned Colocate = 0;
1816  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1817  NodeSet &N1 = NodeSets[i];
1819  if (N1.empty() || !succ_L(N1, S1))
1820  continue;
1821  for (int j = i + 1; j < e; ++j) {
1822  NodeSet &N2 = NodeSets[j];
1823  if (N1.compareRecMII(N2) != 0)
1824  continue;
1826  if (N2.empty() || !succ_L(N2, S2))
1827  continue;
1828  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1829  N1.setColocate(++Colocate);
1830  N2.setColocate(Colocate);
1831  break;
1832  }
1833  }
1834  }
1835 }
1836 
1837 /// Check if the existing node-sets are profitable. If not, then ignore the
1838 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1839 /// a heuristic. If the MII is large and there is a non-recurrent node with
1840 /// a large depth compared to the MII, then it's best to try and schedule
1841 /// all instruction together instead of starting with the recurrent node-sets.
1842 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1843  // Look for loops with a large MII.
1844  if (MII <= 20)
1845  return;
1846  // Check if the node-set contains only a simple add recurrence.
1847  for (auto &NS : NodeSets)
1848  if (NS.size() > 2)
1849  return;
1850  // If the depth of any instruction is significantly larger than the MII, then
1851  // ignore the recurrent node-sets and treat all instructions equally.
1852  for (auto &SU : SUnits)
1853  if (SU.getDepth() > MII * 1.5) {
1854  NodeSets.clear();
1855  DEBUG(dbgs() << "Clear recurrence node-sets\n");
1856  return;
1857  }
1858 }
1859 
1860 /// Add the nodes that do not belong to a recurrence set into groups
1861 /// based upon connected componenets.
1862 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1863  SetVector<SUnit *> NodesAdded;
1864  SmallPtrSet<SUnit *, 8> Visited;
1865  // Add the nodes that are on a path between the previous node sets and
1866  // the current node set.
1867  for (NodeSet &I : NodeSets) {
1869  // Add the nodes from the current node set to the previous node set.
1870  if (succ_L(I, N)) {
1871  SetVector<SUnit *> Path;
1872  for (SUnit *NI : N) {
1873  Visited.clear();
1874  computePath(NI, Path, NodesAdded, I, Visited);
1875  }
1876  if (Path.size() > 0)
1877  I.insert(Path.begin(), Path.end());
1878  }
1879  // Add the nodes from the previous node set to the current node set.
1880  N.clear();
1881  if (succ_L(NodesAdded, N)) {
1882  SetVector<SUnit *> Path;
1883  for (SUnit *NI : N) {
1884  Visited.clear();
1885  computePath(NI, Path, I, NodesAdded, Visited);
1886  }
1887  if (Path.size() > 0)
1888  I.insert(Path.begin(), Path.end());
1889  }
1890  NodesAdded.insert(I.begin(), I.end());
1891  }
1892 
1893  // Create a new node set with the connected nodes of any successor of a node
1894  // in a recurrent set.
1895  NodeSet NewSet;
1897  if (succ_L(NodesAdded, N))
1898  for (SUnit *I : N)
1899  addConnectedNodes(I, NewSet, NodesAdded);
1900  if (NewSet.size() > 0)
1901  NodeSets.push_back(NewSet);
1902 
1903  // Create a new node set with the connected nodes of any predecessor of a node
1904  // in a recurrent set.
1905  NewSet.clear();
1906  if (pred_L(NodesAdded, N))
1907  for (SUnit *I : N)
1908  addConnectedNodes(I, NewSet, NodesAdded);
1909  if (NewSet.size() > 0)
1910  NodeSets.push_back(NewSet);
1911 
1912  // Create new nodes sets with the connected nodes any any remaining node that
1913  // has no predecessor.
1914  for (unsigned i = 0; i < SUnits.size(); ++i) {
1915  SUnit *SU = &SUnits[i];
1916  if (NodesAdded.count(SU) == 0) {
1917  NewSet.clear();
1918  addConnectedNodes(SU, NewSet, NodesAdded);
1919  if (NewSet.size() > 0)
1920  NodeSets.push_back(NewSet);
1921  }
1922  }
1923 }
1924 
1925 /// Add the node to the set, and add all is its connected nodes to the set.
1926 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1927  SetVector<SUnit *> &NodesAdded) {
1928  NewSet.insert(SU);
1929  NodesAdded.insert(SU);
1930  for (auto &SI : SU->Succs) {
1931  SUnit *Successor = SI.getSUnit();
1932  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1933  addConnectedNodes(Successor, NewSet, NodesAdded);
1934  }
1935  for (auto &PI : SU->Preds) {
1936  SUnit *Predecessor = PI.getSUnit();
1937  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1938  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1939  }
1940 }
1941 
1942 /// Return true if Set1 contains elements in Set2. The elements in common
1943 /// are returned in a different container.
1944 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1945  SmallSetVector<SUnit *, 8> &Result) {
1946  Result.clear();
1947  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1948  SUnit *SU = Set1[i];
1949  if (Set2.count(SU) != 0)
1950  Result.insert(SU);
1951  }
1952  return !Result.empty();
1953 }
1954 
1955 /// Merge the recurrence node sets that have the same initial node.
1956 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1957  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1958  ++I) {
1959  NodeSet &NI = *I;
1960  for (NodeSetType::iterator J = I + 1; J != E;) {
1961  NodeSet &NJ = *J;
1962  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1963  if (NJ.compareRecMII(NI) > 0)
1964  NI.setRecMII(NJ.getRecMII());
1965  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1966  ++NII)
1967  I->insert(*NII);
1968  NodeSets.erase(J);
1969  E = NodeSets.end();
1970  } else {
1971  ++J;
1972  }
1973  }
1974  }
1975 }
1976 
1977 /// Remove nodes that have been scheduled in previous NodeSets.
1978 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1979  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1980  ++I)
1981  for (NodeSetType::iterator J = I + 1; J != E;) {
1982  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1983 
1984  if (J->size() == 0) {
1985  NodeSets.erase(J);
1986  E = NodeSets.end();
1987  } else {
1988  ++J;
1989  }
1990  }
1991 }
1992 
1993 /// Return true if Inst1 defines a value that is used in Inst2.
1994 static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
1995  for (auto &SI : Inst1->Succs)
1996  if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
1997  return true;
1998  return false;
1999 }
2000 
2001 /// Compute an ordered list of the dependence graph nodes, which
2002 /// indicates the order that the nodes will be scheduled. This is a
2003 /// two-level algorithm. First, a partial order is created, which
2004 /// consists of a list of sets ordered from highest to lowest priority.
2005 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2007  NodeOrder.clear();
2008 
2009  for (auto &Nodes : NodeSets) {
2010  DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2011  OrderKind Order;
2013  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2014  R.insert(N.begin(), N.end());
2015  Order = BottomUp;
2016  DEBUG(dbgs() << " Bottom up (preds) ");
2017  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2018  R.insert(N.begin(), N.end());
2019  Order = TopDown;
2020  DEBUG(dbgs() << " Top down (succs) ");
2021  } else if (isIntersect(N, Nodes, R)) {
2022  // If some of the successors are in the existing node-set, then use the
2023  // top-down ordering.
2024  Order = TopDown;
2025  DEBUG(dbgs() << " Top down (intersect) ");
2026  } else if (NodeSets.size() == 1) {
2027  for (auto &N : Nodes)
2028  if (N->Succs.size() == 0)
2029  R.insert(N);
2030  Order = BottomUp;
2031  DEBUG(dbgs() << " Bottom up (all) ");
2032  } else {
2033  // Find the node with the highest ASAP.
2034  SUnit *maxASAP = nullptr;
2035  for (SUnit *SU : Nodes) {
2036  if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
2037  maxASAP = SU;
2038  }
2039  R.insert(maxASAP);
2040  Order = BottomUp;
2041  DEBUG(dbgs() << " Bottom up (default) ");
2042  }
2043 
2044  while (!R.empty()) {
2045  if (Order == TopDown) {
2046  // Choose the node with the maximum height. If more than one, choose
2047  // the node with the lowest MOV. If still more than one, check if there
2048  // is a dependence between the instructions.
2049  while (!R.empty()) {
2050  SUnit *maxHeight = nullptr;
2051  for (SUnit *I : R) {
2052  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2053  maxHeight = I;
2054  else if (getHeight(I) == getHeight(maxHeight) &&
2055  getMOV(I) < getMOV(maxHeight) &&
2056  !hasDataDependence(maxHeight, I))
2057  maxHeight = I;
2058  else if (hasDataDependence(I, maxHeight))
2059  maxHeight = I;
2060  }
2061  NodeOrder.insert(maxHeight);
2062  DEBUG(dbgs() << maxHeight->NodeNum << " ");
2063  R.remove(maxHeight);
2064  for (const auto &I : maxHeight->Succs) {
2065  if (Nodes.count(I.getSUnit()) == 0)
2066  continue;
2067  if (NodeOrder.count(I.getSUnit()) != 0)
2068  continue;
2069  if (ignoreDependence(I, false))
2070  continue;
2071  R.insert(I.getSUnit());
2072  }
2073  // Back-edges are predecessors with an anti-dependence.
2074  for (const auto &I : maxHeight->Preds) {
2075  if (I.getKind() != SDep::Anti)
2076  continue;
2077  if (Nodes.count(I.getSUnit()) == 0)
2078  continue;
2079  if (NodeOrder.count(I.getSUnit()) != 0)
2080  continue;
2081  R.insert(I.getSUnit());
2082  }
2083  }
2084  Order = BottomUp;
2085  DEBUG(dbgs() << "\n Switching order to bottom up ");
2087  if (pred_L(NodeOrder, N, &Nodes))
2088  R.insert(N.begin(), N.end());
2089  } else {
2090  // Choose the node with the maximum depth. If more than one, choose
2091  // the node with the lowest MOV. If there is still more than one, check
2092  // for a dependence between the instructions.
2093  while (!R.empty()) {
2094  SUnit *maxDepth = nullptr;
2095  for (SUnit *I : R) {
2096  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2097  maxDepth = I;
2098  else if (getDepth(I) == getDepth(maxDepth) &&
2099  getMOV(I) < getMOV(maxDepth) &&
2100  !hasDataDependence(I, maxDepth))
2101  maxDepth = I;
2102  else if (hasDataDependence(maxDepth, I))
2103  maxDepth = I;
2104  }
2105  NodeOrder.insert(maxDepth);
2106  DEBUG(dbgs() << maxDepth->NodeNum << " ");
2107  R.remove(maxDepth);
2108  if (Nodes.isExceedSU(maxDepth)) {
2109  Order = TopDown;
2110  R.clear();
2111  R.insert(Nodes.getNode(0));
2112  break;
2113  }
2114  for (const auto &I : maxDepth->Preds) {
2115  if (Nodes.count(I.getSUnit()) == 0)
2116  continue;
2117  if (NodeOrder.count(I.getSUnit()) != 0)
2118  continue;
2119  if (I.getKind() == SDep::Anti)
2120  continue;
2121  R.insert(I.getSUnit());
2122  }
2123  // Back-edges are predecessors with an anti-dependence.
2124  for (const auto &I : maxDepth->Succs) {
2125  if (I.getKind() != SDep::Anti)
2126  continue;
2127  if (Nodes.count(I.getSUnit()) == 0)
2128  continue;
2129  if (NodeOrder.count(I.getSUnit()) != 0)
2130  continue;
2131  R.insert(I.getSUnit());
2132  }
2133  }
2134  Order = TopDown;
2135  DEBUG(dbgs() << "\n Switching order to top down ");
2137  if (succ_L(NodeOrder, N, &Nodes))
2138  R.insert(N.begin(), N.end());
2139  }
2140  }
2141  DEBUG(dbgs() << "\nDone with Nodeset\n");
2142  }
2143 
2144  DEBUG({
2145  dbgs() << "Node order: ";
2146  for (SUnit *I : NodeOrder)
2147  dbgs() << " " << I->NodeNum << " ";
2148  dbgs() << "\n";
2149  });
2150 }
2151 
2152 /// Process the nodes in the computed order and create the pipelined schedule
2153 /// of the instructions, if possible. Return true if a schedule is found.
2154 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2155 
2156  if (NodeOrder.size() == 0)
2157  return false;
2158 
2159  bool scheduleFound = false;
2160  // Keep increasing II until a valid schedule is found.
2161  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2162  Schedule.reset();
2163  Schedule.setInitiationInterval(II);
2164  DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2165 
2168  do {
2169  SUnit *SU = *NI;
2170 
2171  // Compute the schedule time for the instruction, which is based
2172  // upon the scheduled time for any predecessors/successors.
2173  int EarlyStart = INT_MIN;
2174  int LateStart = INT_MAX;
2175  // These values are set when the size of the schedule window is limited
2176  // due to chain dependences.
2177  int SchedEnd = INT_MAX;
2178  int SchedStart = INT_MIN;
2179  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2180  II, this);
2181  DEBUG({
2182  dbgs() << "Inst (" << SU->NodeNum << ") ";
2183  SU->getInstr()->dump();
2184  dbgs() << "\n";
2185  });
2186  DEBUG({
2187  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2188  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2189  });
2190 
2191  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2192  SchedStart > LateStart)
2193  scheduleFound = false;
2194  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2195  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2196  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2197  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2198  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2199  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2200  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2201  SchedEnd =
2202  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2203  // When scheduling a Phi it is better to start at the late cycle and go
2204  // backwards. The default order may insert the Phi too far away from
2205  // its first dependence.
2206  if (SU->getInstr()->isPHI())
2207  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2208  else
2209  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2210  } else {
2211  int FirstCycle = Schedule.getFirstCycle();
2212  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2213  FirstCycle + getASAP(SU) + II - 1, II);
2214  }
2215  // Even if we find a schedule, make sure the schedule doesn't exceed the
2216  // allowable number of stages. We keep trying if this happens.
2217  if (scheduleFound)
2218  if (SwpMaxStages > -1 &&
2219  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2220  scheduleFound = false;
2221 
2222  DEBUG({
2223  if (!scheduleFound)
2224  dbgs() << "\tCan't schedule\n";
2225  });
2226  } while (++NI != NE && scheduleFound);
2227 
2228  // If a schedule is found, check if it is a valid schedule too.
2229  if (scheduleFound)
2230  scheduleFound = Schedule.isValidSchedule(this);
2231  }
2232 
2233  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2234 
2235  if (scheduleFound)
2236  Schedule.finalizeSchedule(this);
2237  else
2238  Schedule.reset();
2239 
2240  return scheduleFound && Schedule.getMaxStageCount() > 0;
2241 }
2242 
2243 /// Given a schedule for the loop, generate a new version of the loop,
2244 /// and replace the old version. This function generates a prolog
2245 /// that contains the initial iterations in the pipeline, and kernel
2246 /// loop, and the epilogue that contains the code for the final
2247 /// iterations.
2248 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2249  // Create a new basic block for the kernel and add it to the CFG.
2250  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2251 
2252  unsigned MaxStageCount = Schedule.getMaxStageCount();
2253 
2254  // Remember the registers that are used in different stages. The index is
2255  // the iteration, or stage, that the instruction is scheduled in. This is
2256  // a map between register names in the orignal block and the names created
2257  // in each stage of the pipelined loop.
2258  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2259  InstrMapTy InstrMap;
2260 
2262  // Generate the prolog instructions that set up the pipeline.
2263  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2264  MF.insert(BB->getIterator(), KernelBB);
2265 
2266  // Rearrange the instructions to generate the new, pipelined loop,
2267  // and update register names as needed.
2268  for (int Cycle = Schedule.getFirstCycle(),
2269  LastCycle = Schedule.getFinalCycle();
2270  Cycle <= LastCycle; ++Cycle) {
2271  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2272  // This inner loop schedules each instruction in the cycle.
2273  for (SUnit *CI : CycleInstrs) {
2274  if (CI->getInstr()->isPHI())
2275  continue;
2276  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2277  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2278  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2279  KernelBB->push_back(NewMI);
2280  InstrMap[NewMI] = CI->getInstr();
2281  }
2282  }
2283 
2284  // Copy any terminator instructions to the new kernel, and update
2285  // names as needed.
2286  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2287  E = BB->instr_end();
2288  I != E; ++I) {
2289  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2290  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2291  KernelBB->push_back(NewMI);
2292  InstrMap[NewMI] = &*I;
2293  }
2294 
2295  KernelBB->transferSuccessors(BB);
2296  KernelBB->replaceSuccessor(BB, KernelBB);
2297 
2298  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2299  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2300  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2301  InstrMap, MaxStageCount, MaxStageCount, false);
2302 
2303  DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2304 
2306  // Generate the epilog instructions to complete the pipeline.
2307  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2308  PrologBBs);
2309 
2310  // We need this step because the register allocation doesn't handle some
2311  // situations well, so we insert copies to help out.
2312  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2313 
2314  // Remove dead instructions due to loop induction variables.
2315  removeDeadInstructions(KernelBB, EpilogBBs);
2316 
2317  // Add branches between prolog and epilog blocks.
2318  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2319 
2320  // Remove the original loop since it's no longer referenced.
2321  BB->clear();
2322  BB->eraseFromParent();
2323 
2324  delete[] VRMap;
2325 }
2326 
2327 /// Generate the pipeline prolog code.
2328 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2329  MachineBasicBlock *KernelBB,
2330  ValueMapTy *VRMap,
2331  MBBVectorTy &PrologBBs) {
2332  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2333  assert(PreheaderBB != NULL &&
2334  "Need to add code to handle loops w/o preheader");
2335  MachineBasicBlock *PredBB = PreheaderBB;
2336  InstrMapTy InstrMap;
2337 
2338  // Generate a basic block for each stage, not including the last stage,
2339  // which will be generated in the kernel. Each basic block may contain
2340  // instructions from multiple stages/iterations.
2341  for (unsigned i = 0; i < LastStage; ++i) {
2342  // Create and insert the prolog basic block prior to the original loop
2343  // basic block. The original loop is removed later.
2344  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2345  PrologBBs.push_back(NewBB);
2346  MF.insert(BB->getIterator(), NewBB);
2347  NewBB->transferSuccessors(PredBB);
2348  PredBB->addSuccessor(NewBB);
2349  PredBB = NewBB;
2350 
2351  // Generate instructions for each appropriate stage. Process instructions
2352  // in original program order.
2353  for (int StageNum = i; StageNum >= 0; --StageNum) {
2354  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2355  BBE = BB->getFirstTerminator();
2356  BBI != BBE; ++BBI) {
2357  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2358  if (BBI->isPHI())
2359  continue;
2360  MachineInstr *NewMI =
2361  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2362  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2363  VRMap);
2364  NewBB->push_back(NewMI);
2365  InstrMap[NewMI] = &*BBI;
2366  }
2367  }
2368  }
2369  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2370  DEBUG({
2371  dbgs() << "prolog:\n";
2372  NewBB->dump();
2373  });
2374  }
2375 
2376  PredBB->replaceSuccessor(BB, KernelBB);
2377 
2378  // Check if we need to remove the branch from the preheader to the original
2379  // loop, and replace it with a branch to the new loop.
2380  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2381  if (numBranches) {
2383  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2384  }
2385 }
2386 
2387 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2388 /// that were started in either the prolog or the kernel. We create a basic
2389 /// block for each stage that needs to complete.
2390 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2391  MachineBasicBlock *KernelBB,
2392  ValueMapTy *VRMap,
2393  MBBVectorTy &EpilogBBs,
2394  MBBVectorTy &PrologBBs) {
2395  // We need to change the branch from the kernel to the first epilog block, so
2396  // this call to analyze branch uses the kernel rather than the original BB.
2397  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2399  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2400  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2401  if (checkBranch)
2402  return;
2403 
2404  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2405  if (*LoopExitI == KernelBB)
2406  ++LoopExitI;
2407  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2408  MachineBasicBlock *LoopExitBB = *LoopExitI;
2409 
2410  MachineBasicBlock *PredBB = KernelBB;
2411  MachineBasicBlock *EpilogStart = LoopExitBB;
2412  InstrMapTy InstrMap;
2413 
2414  // Generate a basic block for each stage, not including the last stage,
2415  // which was generated for the kernel. Each basic block may contain
2416  // instructions from multiple stages/iterations.
2417  int EpilogStage = LastStage + 1;
2418  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2419  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
2420  EpilogBBs.push_back(NewBB);
2421  MF.insert(BB->getIterator(), NewBB);
2422 
2423  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2424  NewBB->addSuccessor(LoopExitBB);
2425 
2426  if (EpilogStart == LoopExitBB)
2427  EpilogStart = NewBB;
2428 
2429  // Add instructions to the epilog depending on the current block.
2430  // Process instructions in original program order.
2431  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2432  for (auto &BBI : *BB) {
2433  if (BBI.isPHI())
2434  continue;
2435  MachineInstr *In = &BBI;
2436  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2437  MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
2438  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2439  NewBB->push_back(NewMI);
2440  InstrMap[NewMI] = In;
2441  }
2442  }
2443  }
2444  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2445  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2446  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2447  InstrMap, LastStage, EpilogStage, i == 1);
2448  PredBB = NewBB;
2449 
2450  DEBUG({
2451  dbgs() << "epilog:\n";
2452  NewBB->dump();
2453  });
2454  }
2455 
2456  // Fix any Phi nodes in the loop exit block.
2457  for (MachineInstr &MI : *LoopExitBB) {
2458  if (!MI.isPHI())
2459  break;
2460  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2461  MachineOperand &MO = MI.getOperand(i);
2462  if (MO.getMBB() == BB)
2463  MO.setMBB(PredBB);
2464  }
2465  }
2466 
2467  // Create a branch to the new epilog from the kernel.
2468  // Remove the original branch and add a new branch to the epilog.
2469  TII->removeBranch(*KernelBB);
2470  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2471  // Add a branch to the loop exit.
2472  if (EpilogBBs.size() > 0) {
2473  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2475  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2476  }
2477 }
2478 
2479 /// Replace all uses of FromReg that appear outside the specified
2480 /// basic block with ToReg.
2481 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2482  MachineBasicBlock *MBB,
2484  LiveIntervals &LIS) {
2485  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2486  E = MRI.use_end();
2487  I != E;) {
2488  MachineOperand &O = *I;
2489  ++I;
2490  if (O.getParent()->getParent() != MBB)
2491  O.setReg(ToReg);
2492  }
2493  if (!LIS.hasInterval(ToReg))
2494  LIS.createEmptyInterval(ToReg);
2495 }
2496 
2497 /// Return true if the register has a use that occurs outside the
2498 /// specified loop.
2499 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2501  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
2502  E = MRI.use_end();
2503  I != E; ++I)
2504  if (I->getParent()->getParent() != BB)
2505  return true;
2506  return false;
2507 }
2508 
2509 /// Generate Phis for the specific block in the generated pipelined code.
2510 /// This function looks at the Phis from the original code to guide the
2511 /// creation of new Phis.
2512 void SwingSchedulerDAG::generateExistingPhis(
2514  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2515  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2516  bool IsLast) {
2517  // Compute the stage number for the inital value of the Phi, which
2518  // comes from the prolog. The prolog to use depends on to which kernel/
2519  // epilog that we're adding the Phi.
2520  unsigned PrologStage = 0;
2521  unsigned PrevStage = 0;
2522  bool InKernel = (LastStageNum == CurStageNum);
2523  if (InKernel) {
2524  PrologStage = LastStageNum - 1;
2525  PrevStage = CurStageNum;
2526  } else {
2527  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2528  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2529  }
2530 
2531  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2532  BBE = BB->getFirstNonPHI();
2533  BBI != BBE; ++BBI) {
2534  unsigned Def = BBI->getOperand(0).getReg();
2535 
2536  unsigned InitVal = 0;
2537  unsigned LoopVal = 0;
2538  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2539 
2540  unsigned PhiOp1 = 0;
2541  // The Phi value from the loop body typically is defined in the loop, but
2542  // not always. So, we need to check if the value is defined in the loop.
2543  unsigned PhiOp2 = LoopVal;
2544  if (VRMap[LastStageNum].count(LoopVal))
2545  PhiOp2 = VRMap[LastStageNum][LoopVal];
2546 
2547  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2548  int LoopValStage =
2549  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2550  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2551  if (NumStages == 0) {
2552  // We don't need to generate a Phi anymore, but we need to rename any uses
2553  // of the Phi value.
2554  unsigned NewReg = VRMap[PrevStage][LoopVal];
2555  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2556  Def, NewReg);
2557  if (VRMap[CurStageNum].count(LoopVal))
2558  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2559  }
2560  // Adjust the number of Phis needed depending on the number of prologs left,
2561  // and the distance from where the Phi is first scheduled.
2562  unsigned NumPhis = NumStages;
2563  if (!InKernel && (int)PrologStage < LoopValStage)
2564  // The NumPhis is the maximum number of new Phis needed during the steady
2565  // state. If the Phi has not been scheduled in current prolog, then we
2566  // need to generate less Phis.
2567  NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
2568  // The number of Phis cannot exceed the number of prolog stages. Each
2569  // stage can potentially define two values.
2570  NumPhis = std::min(NumPhis, PrologStage + 2);
2571 
2572  unsigned NewReg = 0;
2573 
2574  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2575  // In the epilog, we may need to look back one stage to get the correct
2576  // Phi name because the epilog and prolog blocks execute the same stage.
2577  // The correct name is from the previous block only when the Phi has
2578  // been completely scheduled prior to the epilog, and Phi value is not
2579  // needed in multiple stages.
2580  int StageDiff = 0;
2581  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2582  NumPhis == 1)
2583  StageDiff = 1;
2584  // Adjust the computations below when the phi and the loop definition
2585  // are scheduled in different stages.
2586  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2587  StageDiff = StageScheduled - LoopValStage;
2588  for (unsigned np = 0; np < NumPhis; ++np) {
2589  // If the Phi hasn't been scheduled, then use the initial Phi operand
2590  // value. Otherwise, use the scheduled version of the instruction. This
2591  // is a little complicated when a Phi references another Phi.
2592  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2593  PhiOp1 = InitVal;
2594  // Check if the Phi has already been scheduled in a prolog stage.
2595  else if (PrologStage >= AccessStage + StageDiff + np &&
2596  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2597  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2598  // Check if the Phi has already been scheduled, but the loop intruction
2599  // is either another Phi, or doesn't occur in the loop.
2600  else if (PrologStage >= AccessStage + StageDiff + np) {
2601  // If the Phi references another Phi, we need to examine the other
2602  // Phi to get the correct value.
2603  PhiOp1 = LoopVal;
2604  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2605  int Indirects = 1;
2606  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2607  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2608  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2609  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2610  else
2611  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2612  InstOp1 = MRI.getVRegDef(PhiOp1);
2613  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2614  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2615  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2616  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2617  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2618  break;
2619  }
2620  ++Indirects;
2621  }
2622  } else
2623  PhiOp1 = InitVal;
2624  // If this references a generated Phi in the kernel, get the Phi operand
2625  // from the incoming block.
2626  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2627  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2628  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2629 
2630  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2631  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2632  // In the epilog, a map lookup is needed to get the value from the kernel,
2633  // or previous epilog block. How is does this depends on if the
2634  // instruction is scheduled in the previous block.
2635  if (!InKernel) {
2636  int StageDiffAdj = 0;
2637  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2638  StageDiffAdj = StageScheduled - LoopValStage;
2639  // Use the loop value defined in the kernel, unless the kernel
2640  // contains the last definition of the Phi.
2641  if (np == 0 && PrevStage == LastStageNum &&
2642  (StageScheduled != 0 || LoopValStage != 0) &&
2643  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2644  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2645  // Use the value defined by the Phi. We add one because we switch
2646  // from looking at the loop value to the Phi definition.
2647  else if (np > 0 && PrevStage == LastStageNum &&
2648  VRMap[PrevStage - np + 1].count(Def))
2649  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2650  // Use the loop value defined in the kernel.
2651  else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
2652  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2653  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2654  // Use the value defined by the Phi, unless we're generating the first
2655  // epilog and the Phi refers to a Phi in a different stage.
2656  else if (VRMap[PrevStage - np].count(Def) &&
2657  (!LoopDefIsPhi || PrevStage != LastStageNum))
2658  PhiOp2 = VRMap[PrevStage - np][Def];
2659  }
2660 
2661  // Check if we can reuse an existing Phi. This occurs when a Phi
2662  // references another Phi, and the other Phi is scheduled in an
2663  // earlier stage. We can try to reuse an existing Phi up until the last
2664  // stage of the current Phi.
2665  if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
2666  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2667  int StageDiff = (StageScheduled - LoopValStage);
2668  LVNumStages -= StageDiff;
2669  if (LVNumStages > (int)np) {
2670  NewReg = PhiOp2;
2671  unsigned ReuseStage = CurStageNum;
2672  if (Schedule.isLoopCarried(this, *PhiInst))
2673  ReuseStage -= LVNumStages;
2674  // Check if the Phi to reuse has been generated yet. If not, then
2675  // there is nothing to reuse.
2676  if (VRMap[ReuseStage].count(LoopVal)) {
2677  NewReg = VRMap[ReuseStage][LoopVal];
2678 
2679  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2680  &*BBI, Def, NewReg);
2681  // Update the map with the new Phi name.
2682  VRMap[CurStageNum - np][Def] = NewReg;
2683  PhiOp2 = NewReg;
2684  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2685  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2686 
2687  if (IsLast && np == NumPhis - 1)
2688  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2689  continue;
2690  }
2691  } else if (InKernel && StageDiff > 0 &&
2692  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2693  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2694  }
2695 
2696  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2697  NewReg = MRI.createVirtualRegister(RC);
2698 
2699  MachineInstrBuilder NewPhi =
2700  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2701  TII->get(TargetOpcode::PHI), NewReg);
2702  NewPhi.addReg(PhiOp1).addMBB(BB1);
2703  NewPhi.addReg(PhiOp2).addMBB(BB2);
2704  if (np == 0)
2705  InstrMap[NewPhi] = &*BBI;
2706 
2707  // We define the Phis after creating the new pipelined code, so
2708  // we need to rename the Phi values in scheduled instructions.
2709 
2710  unsigned PrevReg = 0;
2711  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2712  PrevReg = VRMap[PrevStage - np][LoopVal];
2713  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2714  Def, NewReg, PrevReg);
2715  // If the Phi has been scheduled, use the new name for rewriting.
2716  if (VRMap[CurStageNum - np].count(Def)) {
2717  unsigned R = VRMap[CurStageNum - np][Def];
2718  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2719  R, NewReg);
2720  }
2721 
2722  // Check if we need to rename any uses that occurs after the loop. The
2723  // register to replace depends on whether the Phi is scheduled in the
2724  // epilog.
2725  if (IsLast && np == NumPhis - 1)
2726  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2727 
2728  // In the kernel, a dependent Phi uses the value from this Phi.
2729  if (InKernel)
2730  PhiOp2 = NewReg;
2731 
2732  // Update the map with the new Phi name.
2733  VRMap[CurStageNum - np][Def] = NewReg;
2734  }
2735 
2736  while (NumPhis++ < NumStages) {
2737  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2738  &*BBI, Def, NewReg, 0);
2739  }
2740 
2741  // Check if we need to rename a Phi that has been eliminated due to
2742  // scheduling.
2743  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2744  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2745  }
2746 }
2747 
2748 /// Generate Phis for the specified block in the generated pipelined code.
2749 /// These are new Phis needed because the definition is scheduled after the
2750 /// use in the pipelened sequence.
2751 void SwingSchedulerDAG::generatePhis(
2753  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2754  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2755  bool IsLast) {
2756  // Compute the stage number that contains the initial Phi value, and
2757  // the Phi from the previous stage.
2758  unsigned PrologStage = 0;
2759  unsigned PrevStage = 0;
2760  unsigned StageDiff = CurStageNum - LastStageNum;
2761  bool InKernel = (StageDiff == 0);
2762  if (InKernel) {
2763  PrologStage = LastStageNum - 1;
2764  PrevStage = CurStageNum;
2765  } else {
2766  PrologStage = LastStageNum - StageDiff;
2767  PrevStage = LastStageNum + StageDiff - 1;
2768  }
2769 
2770  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2771  BBE = BB->instr_end();
2772  BBI != BBE; ++BBI) {
2773  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2774  MachineOperand &MO = BBI->getOperand(i);
2775  if (!MO.isReg() || !MO.isDef() ||
2777  continue;
2778 
2779  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2780  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2781  unsigned Def = MO.getReg();
2782  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2783  // An instruction scheduled in stage 0 and is used after the loop
2784  // requires a phi in the epilog for the last definition from either
2785  // the kernel or prolog.
2786  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2787  hasUseAfterLoop(Def, BB, MRI))
2788  NumPhis = 1;
2789  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2790  continue;
2791 
2792  unsigned PhiOp2 = VRMap[PrevStage][Def];
2793  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2794  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2795  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2796  // The number of Phis can't exceed the number of prolog stages. The
2797  // prolog stage number is zero based.
2798  if (NumPhis > PrologStage + 1 - StageScheduled)
2799  NumPhis = PrologStage + 1 - StageScheduled;
2800  for (unsigned np = 0; np < NumPhis; ++np) {
2801  unsigned PhiOp1 = VRMap[PrologStage][Def];
2802  if (np <= PrologStage)
2803  PhiOp1 = VRMap[PrologStage - np][Def];
2804  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2805  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2806  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2807  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2808  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2809  }
2810  if (!InKernel)
2811  PhiOp2 = VRMap[PrevStage - np][Def];
2812 
2813  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2814  unsigned NewReg = MRI.createVirtualRegister(RC);
2815 
2816  MachineInstrBuilder NewPhi =
2817  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2818  TII->get(TargetOpcode::PHI), NewReg);
2819  NewPhi.addReg(PhiOp1).addMBB(BB1);
2820  NewPhi.addReg(PhiOp2).addMBB(BB2);
2821  if (np == 0)
2822  InstrMap[NewPhi] = &*BBI;
2823 
2824  // Rewrite uses and update the map. The actions depend upon whether
2825  // we generating code for the kernel or epilog blocks.
2826  if (InKernel) {
2827  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2828  &*BBI, PhiOp1, NewReg);
2829  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2830  &*BBI, PhiOp2, NewReg);
2831 
2832  PhiOp2 = NewReg;
2833  VRMap[PrevStage - np - 1][Def] = NewReg;
2834  } else {
2835  VRMap[CurStageNum - np][Def] = NewReg;
2836  if (np == NumPhis - 1)
2837  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2838  &*BBI, Def, NewReg);
2839  }
2840  if (IsLast && np == NumPhis - 1)
2841  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2842  }
2843  }
2844  }
2845 }
2846 
2847 /// Remove instructions that generate values with no uses.
2848 /// Typically, these are induction variable operations that generate values
2849 /// used in the loop itself. A dead instruction has a definition with
2850 /// no uses, or uses that occur in the original loop only.
2851 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2852  MBBVectorTy &EpilogBBs) {
2853  // For each epilog block, check that the value defined by each instruction
2854  // is used. If not, delete it.
2855  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2856  MBE = EpilogBBs.rend();
2857  MBB != MBE; ++MBB)
2858  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2859  ME = (*MBB)->instr_rend();
2860  MI != ME;) {
2861  // From DeadMachineInstructionElem. Don't delete inline assembly.
2862  if (MI->isInlineAsm()) {
2863  ++MI;
2864  continue;
2865  }
2866  bool SawStore = false;
2867  // Check if it's safe to remove the instruction due to side effects.
2868  // We can, and want to, remove Phis here.
2869  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2870  ++MI;
2871  continue;
2872  }
2873  bool used = true;
2874  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2875  MOE = MI->operands_end();
2876  MOI != MOE; ++MOI) {
2877  if (!MOI->isReg() || !MOI->isDef())
2878  continue;
2879  unsigned reg = MOI->getReg();
2880  unsigned realUses = 0;
2881  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2882  EI = MRI.use_end();
2883  UI != EI; ++UI) {
2884  // Check if there are any uses that occur only in the original
2885  // loop. If so, that's not a real use.
2886  if (UI->getParent()->getParent() != BB) {
2887  realUses++;
2888  used = true;
2889  break;
2890  }
2891  }
2892  if (realUses > 0)
2893  break;
2894  used = false;
2895  }
2896  if (!used) {
2897  MI++->eraseFromParent();
2898  continue;
2899  }
2900  ++MI;
2901  }
2902  // In the kernel block, check if we can remove a Phi that generates a value
2903  // used in an instruction removed in the epilog block.
2904  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2905  BBE = KernelBB->getFirstNonPHI();
2906  BBI != BBE;) {
2907  MachineInstr *MI = &*BBI;
2908  ++BBI;
2909  unsigned reg = MI->getOperand(0).getReg();
2910  if (MRI.use_begin(reg) == MRI.use_end()) {
2911  MI->eraseFromParent();
2912  }
2913  }
2914 }
2915 
2916 /// For loop carried definitions, we split the lifetime of a virtual register
2917 /// that has uses past the definition in the next iteration. A copy with a new
2918 /// virtual register is inserted before the definition, which helps with
2919 /// generating a better register assignment.
2920 ///
2921 /// v1 = phi(a, v2) v1 = phi(a, v2)
2922 /// v2 = phi(b, v3) v2 = phi(b, v3)
2923 /// v3 = .. v4 = copy v1
2924 /// .. = V1 v3 = ..
2925 /// .. = v4
2926 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2927  MBBVectorTy &EpilogBBs,
2928  SMSchedule &Schedule) {
2929  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2930  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2931  BBF = KernelBB->getFirstNonPHI();
2932  BBI != BBF; ++BBI) {
2933  unsigned Def = BBI->getOperand(0).getReg();
2934  // Check for any Phi definition that used as an operand of another Phi
2935  // in the same block.
2936  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2937  E = MRI.use_instr_end();
2938  I != E; ++I) {
2939  if (I->isPHI() && I->getParent() == KernelBB) {
2940  // Get the loop carried definition.
2941  unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
2942  if (!LCDef)
2943  continue;
2944  MachineInstr *MI = MRI.getVRegDef(LCDef);
2945  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2946  continue;
2947  // Search through the rest of the block looking for uses of the Phi
2948  // definition. If one occurs, then split the lifetime.
2949  unsigned SplitReg = 0;
2950  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2951  KernelBB->instr_end()))
2952  if (BBJ.readsRegister(Def)) {
2953  // We split the lifetime when we find the first use.
2954  if (SplitReg == 0) {
2955  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2956  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2957  TII->get(TargetOpcode::COPY), SplitReg)
2958  .addReg(Def);
2959  }
2960  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2961  }
2962  if (!SplitReg)
2963  continue;
2964  // Search through each of the epilog blocks for any uses to be renamed.
2965  for (auto &Epilog : EpilogBBs)
2966  for (auto &I : *Epilog)
2967  if (I.readsRegister(Def))
2968  I.substituteRegister(Def, SplitReg, 0, *TRI);
2969  break;
2970  }
2971  }
2972  }
2973 }
2974 
2975 /// Remove the incoming block from the Phis in a basic block.
2976 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2977  for (MachineInstr &MI : *BB) {
2978  if (!MI.isPHI())
2979  break;
2980  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2981  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2982  MI.RemoveOperand(i + 1);
2983  MI.RemoveOperand(i);
2984  break;
2985  }
2986  }
2987 }
2988 
2989 /// Create branches from each prolog basic block to the appropriate epilog
2990 /// block. These edges are needed if the loop ends before reaching the
2991 /// kernel.
2992 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2993  MachineBasicBlock *KernelBB,
2994  MBBVectorTy &EpilogBBs,
2995  SMSchedule &Schedule, ValueMapTy *VRMap) {
2996  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2997  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2998  MachineInstr *Cmp = Pass.LI.LoopCompare;
2999  MachineBasicBlock *LastPro = KernelBB;
3000  MachineBasicBlock *LastEpi = KernelBB;
3001 
3002  // Start from the blocks connected to the kernel and work "out"
3003  // to the first prolog and the last epilog blocks.
3005  unsigned MaxIter = PrologBBs.size() - 1;
3006  unsigned LC = UINT_MAX;
3007  unsigned LCMin = UINT_MAX;
3008  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3009  // Add branches to the prolog that go to the corresponding
3010  // epilog, and the fall-thru prolog/kernel block.
3011  MachineBasicBlock *Prolog = PrologBBs[j];
3012  MachineBasicBlock *Epilog = EpilogBBs[i];
3013  // We've executed one iteration, so decrement the loop count and check for
3014  // the loop end.
3016  // Check if the LOOP0 has already been removed. If so, then there is no need
3017  // to reduce the trip count.
3018  if (LC != 0)
3019  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3020  MaxIter);
3021 
3022  // Record the value of the first trip count, which is used to determine if
3023  // branches and blocks can be removed for constant trip counts.
3024  if (LCMin == UINT_MAX)
3025  LCMin = LC;
3026 
3027  unsigned numAdded = 0;
3029  Prolog->addSuccessor(Epilog);
3030  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3031  } else if (j >= LCMin) {
3032  Prolog->addSuccessor(Epilog);
3033  Prolog->removeSuccessor(LastPro);
3034  LastEpi->removeSuccessor(Epilog);
3035  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3036  removePhis(Epilog, LastEpi);
3037  // Remove the blocks that are no longer referenced.
3038  if (LastPro != LastEpi) {
3039  LastEpi->clear();
3040  LastEpi->eraseFromParent();
3041  }
3042  LastPro->clear();
3043  LastPro->eraseFromParent();
3044  } else {
3045  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3046  removePhis(Epilog, Prolog);
3047  }
3048  LastPro = Prolog;
3049  LastEpi = Epilog;
3051  E = Prolog->instr_rend();
3052  I != E && numAdded > 0; ++I, --numAdded)
3053  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3054  }
3055 }
3056 
3057 /// Return true if we can compute the amount the instruction changes
3058 /// during each iteration. Set Delta to the amount of the change.
3059 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3060  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3061  unsigned BaseReg;
3062  int64_t Offset;
3063  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
3064  return false;
3065 
3066  MachineRegisterInfo &MRI = MF.getRegInfo();
3067  // Check if there is a Phi. If so, get the definition in the loop.
3068  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3069  if (BaseDef && BaseDef->isPHI()) {
3070  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3071  BaseDef = MRI.getVRegDef(BaseReg);
3072  }
3073  if (!BaseDef)
3074  return false;
3075 
3076  int D = 0;
3077  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3078  return false;
3079 
3080  Delta = D;
3081  return true;
3082 }
3083 
3084 /// Update the memory operand with a new offset when the pipeliner
3085 /// generates a new copy of the instruction that refers to a
3086 /// different memory location.
3087 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3088  MachineInstr &OldMI, unsigned Num) {
3089  if (Num == 0)
3090  return;
3091  // If the instruction has memory operands, then adjust the offset
3092  // when the instruction appears in different stages.
3093  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
3094  if (NumRefs == 0)
3095  return;
3096  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
3097  unsigned Refs = 0;
3098  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3099  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3100  (!MMO->getValue())) {
3101  NewMemRefs[Refs++] = MMO;
3102  continue;
3103  }
3104  unsigned Delta;
3105  if (computeDelta(OldMI, Delta)) {
3106  int64_t AdjOffset = Delta * Num;
3107  NewMemRefs[Refs++] =
3108  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
3109  } else
3110  NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
3111  }
3112  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
3113 }
3114 
3115 /// Clone the instruction for the new pipelined loop and update the
3116 /// memory operands, if needed.
3117 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3118  unsigned CurStageNum,
3119  unsigned InstStageNum) {
3120  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3121  // Check for tied operands in inline asm instructions. This should be handled
3122  // elsewhere, but I'm not sure of the best solution.
3123  if (OldMI->isInlineAsm())
3124  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3125  const auto &MO = OldMI->getOperand(i);
3126  if (MO.isReg() && MO.isUse())
3127  break;
3128  unsigned UseIdx;
3129  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3130  NewMI->tieOperands(i, UseIdx);
3131  }
3132  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3133  return NewMI;
3134 }
3135 
3136 /// Clone the instruction for the new pipelined loop. If needed, this
3137 /// function updates the instruction using the values saved in the
3138 /// InstrChanges structure.
3139 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3140  unsigned CurStageNum,
3141  unsigned InstStageNum,
3142  SMSchedule &Schedule) {
3143  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3145  InstrChanges.find(getSUnit(OldMI));
3146  if (It != InstrChanges.end()) {
3147  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3148  unsigned BasePos, OffsetPos;
3149  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3150  return nullptr;
3151  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3152  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3153  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3154  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3155  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3156  }
3157  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3158  return NewMI;
3159 }
3160 
3161 /// Update the machine instruction with new virtual registers. This
3162 /// function may change the defintions and/or uses.
3163 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3164  unsigned CurStageNum,
3165  unsigned InstrStageNum,
3166  SMSchedule &Schedule,
3167  ValueMapTy *VRMap) {
3168  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3169  MachineOperand &MO = NewMI->getOperand(i);
3171  continue;
3172  unsigned reg = MO.getReg();
3173  if (MO.isDef()) {
3174  // Create a new virtual register for the definition.
3175  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3176  unsigned NewReg = MRI.createVirtualRegister(RC);
3177  MO.setReg(NewReg);
3178  VRMap[CurStageNum][reg] = NewReg;
3179  if (LastDef)
3180  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3181  } else if (MO.isUse()) {
3182  MachineInstr *Def = MRI.getVRegDef(reg);
3183  // Compute the stage that contains the last definition for instruction.
3184  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3185  unsigned StageNum = CurStageNum;
3186  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3187  // Compute the difference in stages between the defintion and the use.
3188  unsigned StageDiff = (InstrStageNum - DefStageNum);
3189  // Make an adjustment to get the last definition.
3190  StageNum -= StageDiff;
3191  }
3192  if (VRMap[StageNum].count(reg))
3193  MO.setReg(VRMap[StageNum][reg]);
3194  }
3195  }
3196 }
3197 
3198 /// Return the instruction in the loop that defines the register.
3199 /// If the definition is a Phi, then follow the Phi operand to
3200 /// the instruction in the loop.
3201 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3203  MachineInstr *Def = MRI.getVRegDef(Reg);
3204  while (Def->isPHI()) {
3205  if (!Visited.insert(Def).second)
3206  break;
3207  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3208  if (Def->getOperand(i + 1).getMBB() == BB) {
3209  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3210  break;
3211  }
3212  }
3213  return Def;
3214 }
3215 
3216 /// Return the new name for the value from the previous stage.
3217 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3218  unsigned LoopVal, unsigned LoopStage,
3219  ValueMapTy *VRMap,
3220  MachineBasicBlock *BB) {
3221  unsigned PrevVal = 0;
3222  if (StageNum > PhiStage) {
3223  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3224  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3225  // The name is defined in the previous stage.
3226  PrevVal = VRMap[StageNum - 1][LoopVal];
3227  else if (VRMap[StageNum].count(LoopVal))
3228  // The previous name is defined in the current stage when the instruction
3229  // order is swapped.
3230  PrevVal = VRMap[StageNum][LoopVal];
3231  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3232  // The loop value hasn't yet been scheduled.
3233  PrevVal = LoopVal;
3234  else if (StageNum == PhiStage + 1)
3235  // The loop value is another phi, which has not been scheduled.
3236  PrevVal = getInitPhiReg(*LoopInst, BB);
3237  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3238  // The loop value is another phi, which has been scheduled.
3239  PrevVal =
3240  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3241  LoopStage, VRMap, BB);
3242  }
3243  return PrevVal;
3244 }
3245 
3246 /// Rewrite the Phi values in the specified block to use the mappings
3247 /// from the initial operand. Once the Phi is scheduled, we switch
3248 /// to using the loop value instead of the Phi value, so those names
3249 /// do not need to be rewritten.
3250 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3251  unsigned StageNum,
3252  SMSchedule &Schedule,
3253  ValueMapTy *VRMap,
3254  InstrMapTy &InstrMap) {
3255  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
3256  BBE = BB->getFirstNonPHI();
3257  BBI != BBE; ++BBI) {
3258  unsigned InitVal = 0;
3259  unsigned LoopVal = 0;
3260  getPhiRegs(*BBI, BB, InitVal, LoopVal);
3261  unsigned PhiDef = BBI->getOperand(0).getReg();
3262 
3263  unsigned PhiStage =
3264  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3265  unsigned LoopStage =
3266  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3267  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3268  if (NumPhis > StageNum)
3269  NumPhis = StageNum;
3270  for (unsigned np = 0; np <= NumPhis; ++np) {
3271  unsigned NewVal =
3272  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3273  if (!NewVal)
3274  NewVal = InitVal;
3275  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
3276  PhiDef, NewVal);
3277  }
3278  }
3279 }
3280 
3281 /// Rewrite a previously scheduled instruction to use the register value
3282 /// from the new instruction. Make sure the instruction occurs in the
3283 /// basic block, and we don't change the uses in the new instruction.
3284 void SwingSchedulerDAG::rewriteScheduledInstr(
3285  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3286  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3287  unsigned NewReg, unsigned PrevReg) {
3288  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3289  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3290  // Rewrite uses that have been scheduled already to use the new
3291  // Phi register.
3292  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3293  EI = MRI.use_end();
3294  UI != EI;) {
3295  MachineOperand &UseOp = *UI;
3296  MachineInstr *UseMI = UseOp.getParent();
3297  ++UI;
3298  if (UseMI->getParent() != BB)
3299  continue;
3300  if (UseMI->isPHI()) {
3301  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3302  continue;
3303  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3304  continue;
3305  }
3306  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3307  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3308  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3309  int StageSched = Schedule.stageScheduled(OrigMISU);
3310  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3311  unsigned ReplaceReg = 0;
3312  // This is the stage for the scheduled instruction.
3313  if (StagePhi == StageSched && Phi->isPHI()) {
3314  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3315  if (PrevReg && InProlog)
3316  ReplaceReg = PrevReg;
3317  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3318  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3319  ReplaceReg = PrevReg;
3320  else
3321  ReplaceReg = NewReg;
3322  }
3323  // The scheduled instruction occurs before the scheduled Phi, and the
3324  // Phi is not loop carried.
3325  if (!InProlog && StagePhi + 1 == StageSched &&
3326  !Schedule.isLoopCarried(this, *Phi))
3327  ReplaceReg = NewReg;
3328  if (StagePhi > StageSched && Phi->isPHI())
3329  ReplaceReg = NewReg;
3330  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3331  ReplaceReg = NewReg;
3332  if (ReplaceReg) {
3333  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3334  UseOp.setReg(ReplaceReg);
3335  }
3336  }
3337 }
3338 
3339 /// Check if we can change the instruction to use an offset value from the
3340 /// previous iteration. If so, return true and set the base and offset values
3341 /// so that we can rewrite the load, if necessary.
3342 /// v1 = Phi(v0, v3)
3343 /// v2 = load v1, 0
3344 /// v3 = post_store v1, 4, x
3345 /// This function enables the load to be rewritten as v2 = load v3, 4.
3346 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3347  unsigned &BasePos,
3348  unsigned &OffsetPos,
3349  unsigned &NewBase,
3350  int64_t &Offset) {
3351  // Get the load instruction.
3352  if (TII->isPostIncrement(*MI))
3353  return false;
3354  unsigned BasePosLd, OffsetPosLd;
3355  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3356  return false;
3357  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3358 
3359  // Look for the Phi instruction.
3361  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3362  if (!Phi || !Phi->isPHI())
3363  return false;
3364  // Get the register defined in the loop block.
3365  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3366  if (!PrevReg)
3367  return false;
3368 
3369  // Check for the post-increment load/store instruction.
3370  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3371  if (!PrevDef || PrevDef == MI)
3372  return false;
3373 
3374  if (!TII->isPostIncrement(*PrevDef))
3375  return false;
3376 
3377  unsigned BasePos1 = 0, OffsetPos1 = 0;
3378  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3379  return false;
3380 
3381  // Make sure offset values are both positive or both negative.
3382  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3383  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3384  if ((LoadOffset >= 0) != (StoreOffset >= 0))
3385  return false;
3386 
3387  // Set the return value once we determine that we return true.
3388  BasePos = BasePosLd;
3389  OffsetPos = OffsetPosLd;
3390  NewBase = PrevReg;
3391  Offset = StoreOffset;
3392  return true;
3393 }
3394 
3395 /// Apply changes to the instruction if needed. The changes are need
3396 /// to improve the scheduling and depend up on the final schedule.
3397 MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3398  SMSchedule &Schedule,
3399  bool UpdateDAG) {
3400  SUnit *SU = getSUnit(MI);
3402  InstrChanges.find(SU);
3403  if (It != InstrChanges.end()) {
3404  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3405  unsigned BasePos, OffsetPos;
3406  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3407  return nullptr;
3408  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3409  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3410  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3411  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3412  int BaseStageNum = Schedule.stageScheduled(SU);
3413  int BaseCycleNum = Schedule.cycleScheduled(SU);
3414  if (BaseStageNum < DefStageNum) {
3415  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3416  int OffsetDiff = DefStageNum - BaseStageNum;
3417  if (DefCycleNum < BaseCycleNum) {
3418  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3419  if (OffsetDiff > 0)
3420  --OffsetDiff;
3421  }
3422  int64_t NewOffset =
3423  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3424  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3425  if (UpdateDAG) {
3426  SU->setInstr(NewMI);
3427  MISUnitMap[NewMI] = SU;
3428  }
3429  NewMIs.insert(NewMI);
3430  return NewMI;
3431  }
3432  }
3433  return nullptr;
3434 }
3435 
3436 /// Return true for an order dependence that is loop carried potentially.
3437 /// An order dependence is loop carried if the destination defines a value
3438 /// that may be used by the source in a subsequent iteration.
3439 bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
3440  bool isSucc) {
3441  if (!isOrder(Source, Dep) || Dep.isArtificial())
3442  return false;
3443 
3444  if (!SwpPruneLoopCarried)
3445  return true;
3446 
3447  MachineInstr *SI = Source->getInstr();
3448  MachineInstr *DI = Dep.getSUnit()->getInstr();
3449  if (!isSucc)
3450  std::swap(SI, DI);
3451  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3452 
3453  // Assume ordered loads and stores may have a loop carried dependence.
3454  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3456  return true;
3457 
3458  // Only chain dependences between a load and store can be loop carried.
3459  if (!DI->mayStore() || !SI->mayLoad())
3460  return false;
3461 
3462  unsigned DeltaS, DeltaD;
3463  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3464  return true;
3465 
3466  unsigned BaseRegS, BaseRegD;
3467  int64_t OffsetS, OffsetD;
3468  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3469  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
3470  !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
3471  return true;
3472 
3473  if (BaseRegS != BaseRegD)
3474  return true;
3475 
3476  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3477  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3478 
3479  // This is the main test, which checks the offset values and the loop
3480  // increment value to determine if the accesses may be loop carried.
3481  if (OffsetS >= OffsetD)
3482  return OffsetS + AccessSizeS > DeltaS;
3483  else if (OffsetS < OffsetD)
3484  return OffsetD + AccessSizeD > DeltaD;
3485 
3486  return true;
3487 }
3488 
3489 void SwingSchedulerDAG::postprocessDAG() {
3490  for (auto &M : Mutations)
3491  M->apply(this);
3492 }
3493 
3494 /// Try to schedule the node at the specified StartCycle and continue
3495 /// until the node is schedule or the EndCycle is reached. This function
3496 /// returns true if the node is scheduled. This routine may search either
3497 /// forward or backward for a place to insert the instruction based upon
3498 /// the relative values of StartCycle and EndCycle.
3499 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3500  bool forward = true;
3501  if (StartCycle > EndCycle)
3502  forward = false;
3503 
3504  // The terminating condition depends on the direction.
3505  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3506  for (int curCycle = StartCycle; curCycle != termCycle;
3507  forward ? ++curCycle : --curCycle) {
3508 
3509  // Add the already scheduled instructions at the specified cycle to the DFA.
3510  Resources->clearResources();
3511  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3512  checkCycle <= LastCycle; checkCycle += II) {
3513  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3514 
3515  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3516  E = cycleInstrs.end();
3517  I != E; ++I) {
3518  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3519  continue;
3520  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3521  "These instructions have already been scheduled.");
3522  Resources->reserveResources(*(*I)->getInstr());
3523  }
3524  }
3525  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3526  Resources->canReserveResources(*SU->getInstr())) {
3527  DEBUG({
3528  dbgs() << "\tinsert at cycle " << curCycle << " ";
3529  SU->getInstr()->dump();
3530  });
3531 
3532  ScheduledInstrs[curCycle].push_back(SU);
3533  InstrToCycle.insert(std::make_pair(SU, curCycle));
3534  if (curCycle > LastCycle)
3535  LastCycle = curCycle;
3536  if (curCycle < FirstCycle)
3537  FirstCycle = curCycle;
3538  return true;
3539  }
3540  DEBUG({
3541  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3542  SU->getInstr()->dump();
3543  });
3544  }
3545  return false;
3546 }
3547 
3548 // Return the cycle of the earliest scheduled instruction in the chain.
3549 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3550  SmallPtrSet<SUnit *, 8> Visited;
3551  SmallVector<SDep, 8> Worklist;
3552  Worklist.push_back(Dep);
3553  int EarlyCycle = INT_MAX;
3554  while (!Worklist.empty()) {
3555  const SDep &Cur = Worklist.pop_back_val();
3556  SUnit *PrevSU = Cur.getSUnit();
3557  if (Visited.count(PrevSU))
3558  continue;
3559  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3560  if (it == InstrToCycle.end())
3561  continue;
3562  EarlyCycle = std::min(EarlyCycle, it->second);
3563  for (const auto &PI : PrevSU->Preds)
3564  if (SwingSchedulerDAG::isOrder(PrevSU, PI))
3565  Worklist.push_back(PI);
3566  Visited.insert(PrevSU);
3567  }
3568  return EarlyCycle;
3569 }
3570 
3571 // Return the cycle of the latest scheduled instruction in the chain.
3572 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3573  SmallPtrSet<SUnit *, 8> Visited;
3574  SmallVector<SDep, 8> Worklist;
3575  Worklist.push_back(Dep);
3576  int LateCycle = INT_MIN;
3577  while (!Worklist.empty()) {
3578  const SDep &Cur = Worklist.pop_back_val();
3579  SUnit *SuccSU = Cur.getSUnit();
3580  if (Visited.count(SuccSU))
3581  continue;
3582  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3583  if (it == InstrToCycle.end())
3584  continue;
3585  LateCycle = std::max(LateCycle, it->second);
3586  for (const auto &SI : SuccSU->Succs)
3587  if (SwingSchedulerDAG::isOrder(SuccSU, SI))
3588  Worklist.push_back(SI);
3589  Visited.insert(SuccSU);
3590  }
3591  return LateCycle;
3592 }
3593 
3594 /// If an instruction has a use that spans multiple iterations, then
3595 /// return true. These instructions are characterized by having a back-ege
3596 /// to a Phi, which contains a reference to another Phi.
3597 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3598  for (auto &P : SU->Preds)
3599  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3600  for (auto &S : P.getSUnit()->Succs)
3601  if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
3602  return P.getSUnit();
3603  return nullptr;
3604 }
3605 
3606 /// Compute the scheduling start slot for the instruction. The start slot
3607 /// depends on any predecessor or successor nodes scheduled already.
3608 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3609  int *MinEnd, int *MaxStart, int II,
3610  SwingSchedulerDAG *DAG) {
3611  // Iterate over each instruction that has been scheduled already. The start
3612  // slot computuation depends on whether the previously scheduled instruction
3613  // is a predecessor or successor of the specified instruction.
3614  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3615 
3616  // Iterate over each instruction in the current cycle.
3617  for (SUnit *I : getInstructions(cycle)) {
3618  // Because we're processing a DAG for the dependences, we recognize
3619  // the back-edge in recurrences by anti dependences.
3620  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3621  const SDep &Dep = SU->Preds[i];
3622  if (Dep.getSUnit() == I) {
3623  if (!DAG->isBackedge(SU, Dep)) {
3624  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3625  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3626  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3627  if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
3628  int End = earliestCycleInChain(Dep) + (II - 1);
3629  *MinEnd = std::min(*MinEnd, End);
3630  }
3631  } else {
3632  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3633  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3634  *MinLateStart = std::min(*MinLateStart, LateStart);
3635  }
3636  }
3637  // For instruction that requires multiple iterations, make sure that
3638  // the dependent instruction is not scheduled past the definition.
3639  SUnit *BE = multipleIterations(I, DAG);
3640  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3641  !SU->isPred(I))
3642  *MinLateStart = std::min(*MinLateStart, cycle);
3643  }
3644  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
3645  if (SU->Succs[i].getSUnit() == I) {
3646  const SDep &Dep = SU->Succs[i];
3647  if (!DAG->isBackedge(SU, Dep)) {
3648  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3649  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3650  *MinLateStart = std::min(*MinLateStart, LateStart);
3651  if (DAG->isLoopCarriedOrder(SU, Dep)) {
3652  int Start = latestCycleInChain(Dep) + 1 - II;
3653  *MaxStart = std::max(*MaxStart, Start);
3654  }
3655  } else {
3656  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3657  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3658  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3659  }
3660  }
3661  }
3662  }
3663 }
3664 
3665 /// Order the instructions within a cycle so that the definitions occur
3666 /// before the uses. Returns true if the instruction is added to the start
3667 /// of the list, or false if added to the end.
3668 bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3669  std::deque<SUnit *> &Insts) {
3670  MachineInstr *MI = SU->getInstr();
3671  bool OrderBeforeUse = false;
3672  bool OrderAfterDef = false;
3673  bool OrderBeforeDef = false;
3674  unsigned MoveDef = 0;
3675  unsigned MoveUse = 0;
3676  int StageInst1 = stageScheduled(SU);
3677 
3678  unsigned Pos = 0;
3679  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3680  ++I, ++Pos) {
3681  // Relative order of Phis does not matter.
3682  if (MI->isPHI() && (*I)->getInstr()->isPHI())
3683  continue;
3684  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3685  MachineOperand &MO = MI->getOperand(i);
3687  continue;
3688  unsigned Reg = MO.getReg();
3689  unsigned BasePos, OffsetPos;
3690  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3691  if (MI->getOperand(BasePos).getReg() == Reg)
3692  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3693  Reg = NewReg;
3694  bool Reads, Writes;
3695  std::tie(Reads, Writes) =
3696  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3697  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3698  OrderBeforeUse = true;
3699  MoveUse = Pos;
3700  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3701  // Add the instruction after the scheduled instruction.
3702  OrderAfterDef = true;
3703  MoveDef = Pos;
3704  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3705  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3706  OrderBeforeUse = true;
3707  MoveUse = Pos;
3708  } else {
3709  OrderAfterDef = true;
3710  MoveDef = Pos;
3711  }
3712  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3713  OrderBeforeUse = true;
3714  MoveUse = Pos;
3715  if (MoveUse != 0) {
3716  OrderAfterDef = true;
3717  MoveDef = Pos - 1;
3718  }
3719  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3720  // Add the instruction before the scheduled instruction.
3721  OrderBeforeUse = true;
3722  MoveUse = Pos;
3723  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3724  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3725  OrderBeforeDef = true;
3726  MoveUse = Pos;
3727  }
3728  }
3729  // Check for order dependences between instructions. Make sure the source
3730  // is ordered before the destination.
3731  for (auto &S : SU->Succs)
3732  if (S.getKind() == SDep::Order) {
3733  if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3734  OrderBeforeUse = true;
3735  MoveUse = Pos;
3736  }
3737  } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
3738  if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
3739  if (S.isAssignedRegDep()) {
3740  OrderAfterDef = true;
3741  MoveDef = Pos;
3742  }
3743  } else {
3744  OrderBeforeUse = true;
3745  MoveUse = Pos;
3746  }
3747  }
3748  for (auto &P : SU->Preds)
3749  if (P.getKind() == SDep::Order) {
3750  if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3751  OrderAfterDef = true;
3752  MoveDef = Pos;
3753  }
3754  } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
3755  if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
3756  if (P.isAssignedRegDep()) {
3757  OrderBeforeUse = true;
3758  MoveUse = Pos;
3759  }
3760  } else {
3761  OrderAfterDef = true;
3762  MoveDef = Pos;
3763  }
3764  }
3765  }
3766 
3767  // A circular dependence.
3768  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3769  OrderBeforeUse = false;
3770 
3771  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3772  // to a loop-carried dependence.
3773  if (OrderBeforeDef)
3774  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3775 
3776  // The uncommon case when the instruction order needs to be updated because
3777  // there is both a use and def.
3778  if (OrderBeforeUse && OrderAfterDef) {
3779  SUnit *UseSU = Insts.at(MoveUse);
3780  SUnit *DefSU = Insts.at(MoveDef);
3781  if (MoveUse > MoveDef) {
3782  Insts.erase(Insts.begin() + MoveUse);
3783  Insts.erase(Insts.begin() + MoveDef);
3784  } else {
3785  Insts.erase(Insts.begin() + MoveDef);
3786  Insts.erase(Insts.begin() + MoveUse);
3787  }
3788  if (orderDependence(SSD, UseSU, Insts)) {
3789  Insts.push_front(SU);
3790  orderDependence(SSD, DefSU, Insts);
3791  return true;
3792  }
3793  Insts.pop_back();
3794  Insts.push_back(SU);
3795  Insts.push_back(UseSU);
3796  orderDependence(SSD, DefSU, Insts);
3797  return false;
3798  }
3799  // Put the new instruction first if there is a use in the list. Otherwise,
3800  // put it at the end of the list.
3801  if (OrderBeforeUse)
3802  Insts.push_front(SU);
3803  else
3804  Insts.push_back(SU);
3805  return OrderBeforeUse;
3806 }
3807 
3808 /// Return true if the scheduled Phi has a loop carried operand.
3809 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3810  if (!Phi.isPHI())
3811  return false;
3812  assert(Phi.isPHI() && "Expecing a Phi.");
3813  SUnit *DefSU = SSD->getSUnit(&Phi);
3814  unsigned DefCycle = cycleScheduled(DefSU);
3815  int DefStage = stageScheduled(DefSU);
3816 
3817  unsigned InitVal = 0;
3818  unsigned LoopVal = 0;
3819  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3820  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3821  if (!UseSU)
3822  return true;
3823  if (UseSU->getInstr()->isPHI())
3824  return true;
3825  unsigned LoopCycle = cycleScheduled(UseSU);
3826  int LoopStage = stageScheduled(UseSU);
3827  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3828 }
3829 
3830 /// Return true if the instruction is a definition that is loop carried
3831 /// and defines the use on the next iteration.
3832 /// v1 = phi(v2, v3)
3833 /// (Def) v3 = op v1
3834 /// (MO) = v1
3835 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3836 /// register.
3837 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
3838  MachineInstr *Def, MachineOperand &MO) {
3839  if (!MO.isReg())
3840  return false;
3841  if (Def->isPHI())
3842  return false;
3843  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3844  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3845  return false;
3846  if (!isLoopCarried(SSD, *Phi))
3847  return false;
3848  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3849  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3850  MachineOperand &DMO = Def->getOperand(i);
3851  if (!DMO.isReg() || !DMO.isDef())
3852  continue;
3853  if (DMO.getReg() == LoopReg)
3854  return true;
3855  }
3856  return false;
3857 }
3858 
3859 // Check if the generated schedule is valid. This function checks if
3860 // an instruction that uses a physical register is scheduled in a
3861 // different stage than the definition. The pipeliner does not handle
3862 // physical register values that may cross a basic block boundary.
3863 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3864  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3865  SUnit &SU = SSD->SUnits[i];
3866  if (!SU.hasPhysRegDefs)
3867  continue;
3868  int StageDef = stageScheduled(&SU);
3869  assert(StageDef != -1 && "Instruction should have been scheduled.");
3870  for (auto &SI : SU.Succs)
3871  if (SI.isAssignedRegDep())
3872  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3873  if (stageScheduled(SI.getSUnit()) != StageDef)
3874  return false;
3875  }
3876  return true;
3877 }
3878 
3879 /// After the schedule has been formed, call this function to combine
3880 /// the instructions from the different stages/cycles. That is, this
3881 /// function creates a schedule that represents a single iteration.
3882 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
3883  // Move all instructions to the first stage from later stages.
3884  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3885  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3886  ++stage) {
3887  std::deque<SUnit *> &cycleInstrs =
3888  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3889  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3890  E = cycleInstrs.rend();
3891  I != E; ++I)
3892  ScheduledInstrs[cycle].push_front(*I);
3893  }
3894  }
3895  // Iterate over the definitions in each instruction, and compute the
3896  // stage difference for each use. Keep the maximum value.
3897  for (auto &I : InstrToCycle) {
3898  int DefStage = stageScheduled(I.first);
3899  MachineInstr *MI = I.first->getInstr();
3900  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3901  MachineOperand &Op = MI->getOperand(i);
3902  if (!Op.isReg() || !Op.isDef())
3903  continue;
3904 
3905  unsigned Reg = Op.getReg();
3906  unsigned MaxDiff = 0;
3907  bool PhiIsSwapped = false;
3908  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3909  EI = MRI.use_end();
3910  UI != EI; ++UI) {
3911  MachineOperand &UseOp = *UI;
3912  MachineInstr *UseMI = UseOp.getParent();
3913  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3914  int UseStage = stageScheduled(SUnitUse);
3915  unsigned Diff = 0;
3916  if (UseStage != -1 && UseStage >= DefStage)
3917  Diff = UseStage - DefStage;
3918  if (MI->isPHI()) {
3919  if (isLoopCarried(SSD, *MI))
3920  ++Diff;
3921  else
3922  PhiIsSwapped = true;
3923  }
3924  MaxDiff = std::max(Diff, MaxDiff);
3925  }
3926  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3927  }
3928  }
3929 
3930  // Erase all the elements in the later stages. Only one iteration should
3931  // remain in the scheduled list, and it contains all the instructions.
3932  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3933  ScheduledInstrs.erase(cycle);
3934 
3935  // Change the registers in instruction as specified in the InstrChanges
3936  // map. We need to use the new registers to create the correct order.
3937  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3938  SUnit *SU = &SSD->SUnits[i];
3939  SSD->applyInstrChange(SU->getInstr(), *this, true);
3940  }
3941 
3942  // Reorder the instructions in each cycle to fix and improve the
3943  // generated code.
3944  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3945  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3946  std::deque<SUnit *> newOrderZC;
3947  // Put the zero-cost, pseudo instructions at the start of the cycle.
3948  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3949  SUnit *SU = cycleInstrs[i];
3950  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3951  orderDependence(SSD, SU, newOrderZC);
3952  }
3953  std::deque<SUnit *> newOrderI;
3954  // Then, add the regular instructions back.
3955  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3956  SUnit *SU = cycleInstrs[i];
3957  if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3958  orderDependence(SSD, SU, newOrderI);
3959  }
3960  // Replace the old order with the new order.
3961  cycleInstrs.swap(newOrderZC);
3962  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3963  }
3964 
3965  DEBUG(dump(););
3966 }
3967 
3968 /// Print the schedule information to the given output.
3969 void SMSchedule::print(raw_ostream &os) const {
3970  // Iterate over each cycle.
3971  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3972  // Iterate over each instruction in the cycle.
3973  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3974  for (SUnit *CI : cycleInstrs->second) {
3975  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3976  os << "(" << CI->NodeNum << ") ";
3977  CI->getInstr()->print(os);
3978  os << "\n";
3979  }
3980  }
3981 }
3982 
3983 /// Utility function used for debugging to print the schedule.
3984 void SMSchedule::dump() const { print(dbgs()); }
bool canReserveResources(const llvm::MCInstrDesc *MID)
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
void print(raw_ostream &OS, bool SkipOpers=false, const TargetInstrInfo *TII=nullptr) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:763
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:296
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:72
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
instr_iterator instr_end()
STATISTIC(NumFunctions,"Total number of functions")
size_t i
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
MachineBasicBlock * getMBB() const
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:776
aarch64 AArch64 CCMP Pass
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:605
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:389
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:32
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
A debug info location.
Definition: DebugLoc.h:34
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
void removePred(const SDep &D)
removePred - This removes the specified edge as a pred of the current node if it exists.
Modulo Software false
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
vector_type::const_iterator const_iterator
Definition: SetVector.h:50
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:365
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
bool isArtificial() const
isArtificial - Test if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for correctness.
Definition: ScheduleDAG.h:199
BlockT * getHeader() const
Definition: LoopInfo.h:102
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:258
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
unsigned getHeight() const
getHeight - Return the height of this node, which is the length of the maximum path down to any node ...
Definition: ScheduleDAG.h:425
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:509
bool isPHI() const
Definition: MachineInstr.h:786
void setInstr(MachineInstr *MI)
setInstr - Assign the instruction for the SUnit.
Definition: ScheduleDAG.h:382
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
unsigned getPSet() const
Modulo Software Pipelining
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
std::vector< MachineBasicBlock * >::iterator succ_iterator
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Definition: ScheduleDAG.h:282
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
#define F(x, y, z)
Definition: MD5.cpp:51
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:136
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
MachineBasicBlock * MBB
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
bool isPred(SUnit *N)
isPred - Test if node N is a predecessor of this node.
Definition: ScheduleDAG.h:452
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:791
int64_t getImm() const
void reserveResources(const llvm::MCInstrDesc *MID)
iterator find(const KeyT &Key)
Definition: MapVector.h:131
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value * > &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
TargetInstrInfo - Interface to description of machine instruction set.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:359
SDep - Scheduling dependency.
Definition: ScheduleDAG.h:45
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const MachineRegisterInfo * MRI
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
bool getIncrementValue(const MachineInstr &MI, int &Value) const override
If the instruction is an increment of a constant value, return the amount.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Definition: ScheduleDAG.h:275
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
MachineInstrBuilder & UseMI
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:73
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:260
unsigned getLatency() const
getLatency - Return the latency value for this edge, which roughly means the minimum number of cycles...
Definition: ScheduleDAG.h:139
uint32_t Offset
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:373
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
iterator begin() const
Definition: SmallPtrSet.h:398
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:98
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form...
INITIALIZE_PASS_BEGIN(MachinePipeliner,"pipeliner","Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:52
static cl::opt< unsigned > MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations"))
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:65
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr * > &PrevInsts, unsigned Iter, unsigned MaxIter) const override
Generate code to reduce the loop iteration by one and check if the loop is finished.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:101
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:292
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:494
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:375
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool isInlineAsm() const
Definition: MachineInstr.h:789
static LaneBitmask getNone()
Definition: LaneBitmask.h:74
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
void dump(const TargetInstrInfo *TII=nullptr) const
reverse_iterator rbegin()
Definition: MapVector.h:58
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:756
size_type count(const KeyT &Key) const
Definition: MapVector.h:126
const Value * getValue() const
Return the base address of the memory access.
static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2)
Return true if Inst1 defines a value that is used in Inst2.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:613
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
LiveInterval & createEmptyInterval(unsigned Reg)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:191
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:360
unsigned getDepth() const
getDepth - Return the depth of this node, which is the length of the maximum path up to any node whic...
Definition: ScheduleDAG.h:417
void setLatency(unsigned Lat)
setLatency - Set the latency for this edge.
Definition: ScheduleDAG.h:144
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1724
iterator end() const
Definition: SmallPtrSet.h:405
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
Representation of each machine instruction.
Definition: MachineInstr.h:52
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:556
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:205
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:148
SUnit * getSUnit() const
Definition: ScheduleDAG.h:503
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
use_iterator use_begin(unsigned RegNo) const
This file provides utility analysis objects describing memory locations.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:424
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool hasInterval(unsigned Reg) const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
unsigned NodeNum
Definition: ScheduleDAG.h:266
iterator begin()
Definition: MapVector.h:53
unsigned getReg() const
getReg - Returns the register number.
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:55
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
addPred - This adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:65
virtual const TargetInstrInfo * getInstrInfo() const
mop_iterator operands_begin()
Definition: MachineInstr.h:295
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
vector_type::const_iterator iterator
Definition: SetVector.h:49
A vector that has set insertion semantics.
Definition: SetVector.h:41
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:259
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
IRTranslator LLVM IR MI
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
unsigned getReg() const
getReg - Return the register associated with this edge.
Definition: ScheduleDAG.h:218
ScheduleDAGTopologicalSort is a class that computes a topological ordering for SUnits and provides me...
Definition: ScheduleDAG.h:709
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const override
Analyze the loop code, return true if it cannot be understood.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the the loop block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr's memory reference descriptor list.
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit.
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:244
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.