LLVM  7.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and explioits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/MapVector.h"
65 #include "llvm/ADT/PriorityQueue.h"
66 #include "llvm/ADT/SetVector.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/Statistic.h"
96 #include "llvm/IR/Attributes.h"
97 #include "llvm/IR/DebugLoc.h"
98 #include "llvm/IR/Function.h"
99 #include "llvm/MC/LaneBitmask.h"
100 #include "llvm/MC/MCInstrDesc.h"
102 #include "llvm/MC/MCRegisterInfo.h"
103 #include "llvm/Pass.h"
105 #include "llvm/Support/Compiler.h"
106 #include "llvm/Support/Debug.h"
107 #include "llvm/Support/MathExtras.h"
109 #include <algorithm>
110 #include <cassert>
111 #include <climits>
112 #include <cstdint>
113 #include <deque>
114 #include <functional>
115 #include <iterator>
116 #include <map>
117 #include <memory>
118 #include <tuple>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 #define DEBUG_TYPE "pipeliner"
125 
126 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
127 STATISTIC(NumPipelined, "Number of loops software pipelined");
128 
129 /// A command line option to turn software pipelining on or off.
130 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
132  cl::desc("Enable Software Pipelining"));
133 
134 /// A command line option to enable SWP at -Os.
135 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
136  cl::desc("Enable SWP at Os."), cl::Hidden,
137  cl::init(false));
138 
139 /// A command line argument to limit minimum initial interval for pipelining.
140 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
141  cl::desc("Size limit for the the MII."),
142  cl::Hidden, cl::init(27));
143 
144 /// A command line argument to limit the number of stages in the pipeline.
145 static cl::opt<int>
146  SwpMaxStages("pipeliner-max-stages",
147  cl::desc("Maximum stages allowed in the generated scheduled."),
148  cl::Hidden, cl::init(3));
149 
150 /// A command line option to disable the pruning of chain dependences due to
151 /// an unrelated Phi.
152 static cl::opt<bool>
153  SwpPruneDeps("pipeliner-prune-deps",
154  cl::desc("Prune dependences between unrelated Phi nodes."),
155  cl::Hidden, cl::init(true));
156 
157 /// A command line option to disable the pruning of loop carried order
158 /// dependences.
159 static cl::opt<bool>
160  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
161  cl::desc("Prune loop carried order dependences."),
162  cl::Hidden, cl::init(true));
163 
164 #ifndef NDEBUG
165 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
166 #endif
167 
168 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
169  cl::ReallyHidden, cl::init(false),
170  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
171 
172 namespace {
173 
174 class NodeSet;
175 class SMSchedule;
176 
177 /// The main class in the implementation of the target independent
178 /// software pipeliner pass.
179 class MachinePipeliner : public MachineFunctionPass {
180 public:
181  MachineFunction *MF = nullptr;
182  const MachineLoopInfo *MLI = nullptr;
183  const MachineDominatorTree *MDT = nullptr;
184  const InstrItineraryData *InstrItins;
185  const TargetInstrInfo *TII = nullptr;
186  RegisterClassInfo RegClassInfo;
187 
188 #ifndef NDEBUG
189  static int NumTries;
190 #endif
191 
192  /// Cache the target analysis information about the loop.
193  struct LoopInfo {
194  MachineBasicBlock *TBB = nullptr;
195  MachineBasicBlock *FBB = nullptr;
197  MachineInstr *LoopInductionVar = nullptr;
198  MachineInstr *LoopCompare = nullptr;
199  };
200  LoopInfo LI;
201 
202  static char ID;
203 
204  MachinePipeliner() : MachineFunctionPass(ID) {
206  }
207 
208  bool runOnMachineFunction(MachineFunction &MF) override;
209 
210  void getAnalysisUsage(AnalysisUsage &AU) const override {
217  }
218 
219 private:
220  bool canPipelineLoop(MachineLoop &L);
221  bool scheduleLoop(MachineLoop &L);
222  bool swingModuloScheduler(MachineLoop &L);
223 };
224 
225 /// This class builds the dependence graph for the instructions in a loop,
226 /// and attempts to schedule the instructions using the SMS algorithm.
227 class SwingSchedulerDAG : public ScheduleDAGInstrs {
228  MachinePipeliner &Pass;
229  /// The minimum initiation interval between iterations for this schedule.
230  unsigned MII = 0;
231  /// Set to true if a valid pipelined schedule is found for the loop.
232  bool Scheduled = false;
233  MachineLoop &Loop;
234  LiveIntervals &LIS;
235  const RegisterClassInfo &RegClassInfo;
236 
237  /// A toplogical ordering of the SUnits, which is needed for changing
238  /// dependences and iterating over the SUnits.
240 
241  struct NodeInfo {
242  int ASAP = 0;
243  int ALAP = 0;
244 
245  NodeInfo() = default;
246  };
247  /// Computed properties for each node in the graph.
248  std::vector<NodeInfo> ScheduleInfo;
249 
250  enum OrderKind { BottomUp = 0, TopDown = 1 };
251  /// Computed node ordering for scheduling.
253 
254  using NodeSetType = SmallVector<NodeSet, 8>;
255  using ValueMapTy = DenseMap<unsigned, unsigned>;
256  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
257  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
258 
259  /// Instructions to change when emitting the final schedule.
261 
262  /// We may create a new instruction, so remember it because it
263  /// must be deleted when the pass is finished.
265 
266  /// Ordered list of DAG postprocessing steps.
267  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
268 
269  /// Helper class to implement Johnson's circuit finding algorithm.
270  class Circuits {
271  std::vector<SUnit> &SUnits;
272  SetVector<SUnit *> Stack;
273  BitVector Blocked;
276  unsigned NumPaths;
277  static unsigned MaxPaths;
278 
279  public:
280  Circuits(std::vector<SUnit> &SUs)
281  : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
282 
283  /// Reset the data structures used in the circuit algorithm.
284  void reset() {
285  Stack.clear();
286  Blocked.reset();
287  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
288  NumPaths = 0;
289  }
290 
291  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
292  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
293  void unblock(int U);
294  };
295 
296 public:
297  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
298  const RegisterClassInfo &rci)
299  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
300  RegClassInfo(rci), Topo(SUnits, &ExitSU) {
301  P.MF->getSubtarget().getSMSMutations(Mutations);
302  }
303 
304  void schedule() override;
305  void finishBlock() override;
306 
307  /// Return true if the loop kernel has been scheduled.
308  bool hasNewSchedule() { return Scheduled; }
309 
310  /// Return the earliest time an instruction may be scheduled.
311  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
312 
313  /// Return the latest time an instruction my be scheduled.
314  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
315 
316  /// The mobility function, which the the number of slots in which
317  /// an instruction may be scheduled.
318  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
319 
320  /// The depth, in the dependence graph, for a node.
321  int getDepth(SUnit *Node) { return Node->getDepth(); }
322 
323  /// The height, in the dependence graph, for a node.
324  int getHeight(SUnit *Node) { return Node->getHeight(); }
325 
326  /// Return true if the dependence is a back-edge in the data dependence graph.
327  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
328  /// using an anti dependence from a Phi to an instruction.
329  bool isBackedge(SUnit *Source, const SDep &Dep) {
330  if (Dep.getKind() != SDep::Anti)
331  return false;
332  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
333  }
334 
335  /// Return true if the dependence is an order dependence between non-Phis.
336  static bool isOrder(SUnit *Source, const SDep &Dep) {
337  if (Dep.getKind() != SDep::Order)
338  return false;
339  return (!Source->getInstr()->isPHI() &&
340  !Dep.getSUnit()->getInstr()->isPHI());
341  }
342 
343  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
344 
345  /// The latency of the dependence.
346  unsigned getLatency(SUnit *Source, const SDep &Dep) {
347  // Anti dependences represent recurrences, so use the latency of the
348  // instruction on the back-edge.
349  if (Dep.getKind() == SDep::Anti) {
350  if (Source->getInstr()->isPHI())
351  return Dep.getSUnit()->Latency;
352  if (Dep.getSUnit()->getInstr()->isPHI())
353  return Source->Latency;
354  return Dep.getLatency();
355  }
356  return Dep.getLatency();
357  }
358 
359  /// The distance function, which indicates that operation V of iteration I
360  /// depends on operations U of iteration I-distance.
361  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
362  // Instructions that feed a Phi have a distance of 1. Computing larger
363  // values for arrays requires data dependence information.
364  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
365  return 1;
366  return 0;
367  }
368 
369  /// Set the Minimum Initiation Interval for this schedule attempt.
370  void setMII(unsigned mii) { MII = mii; }
371 
372  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
373 
374  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
375 
376  /// Return the new base register that was stored away for the changed
377  /// instruction.
378  unsigned getInstrBaseReg(SUnit *SU) {
380  InstrChanges.find(SU);
381  if (It != InstrChanges.end())
382  return It->second.first;
383  return 0;
384  }
385 
386  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
387  Mutations.push_back(std::move(Mutation));
388  }
389 
390 private:
391  void addLoopCarriedDependences(AliasAnalysis *AA);
392  void updatePhiDependences();
393  void changeDependences();
394  unsigned calculateResMII();
395  unsigned calculateRecMII(NodeSetType &RecNodeSets);
396  void findCircuits(NodeSetType &NodeSets);
397  void fuseRecs(NodeSetType &NodeSets);
398  void removeDuplicateNodes(NodeSetType &NodeSets);
399  void computeNodeFunctions(NodeSetType &NodeSets);
400  void registerPressureFilter(NodeSetType &NodeSets);
401  void colocateNodeSets(NodeSetType &NodeSets);
402  void checkNodeSets(NodeSetType &NodeSets);
403  void groupRemainingNodes(NodeSetType &NodeSets);
404  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
405  SetVector<SUnit *> &NodesAdded);
406  void computeNodeOrder(NodeSetType &NodeSets);
407  bool schedulePipeline(SMSchedule &Schedule);
408  void generatePipelinedLoop(SMSchedule &Schedule);
409  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
410  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
411  MBBVectorTy &PrologBBs);
412  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
413  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
414  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
415  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
416  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
417  SMSchedule &Schedule, ValueMapTy *VRMap,
418  InstrMapTy &InstrMap, unsigned LastStageNum,
419  unsigned CurStageNum, bool IsLast);
420  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
421  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
422  SMSchedule &Schedule, ValueMapTy *VRMap,
423  InstrMapTy &InstrMap, unsigned LastStageNum,
424  unsigned CurStageNum, bool IsLast);
425  void removeDeadInstructions(MachineBasicBlock *KernelBB,
426  MBBVectorTy &EpilogBBs);
427  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
428  SMSchedule &Schedule);
429  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
430  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
431  ValueMapTy *VRMap);
432  bool computeDelta(MachineInstr &MI, unsigned &Delta);
433  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
434  unsigned Num);
435  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
436  unsigned InstStageNum);
437  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
438  unsigned InstStageNum,
439  SMSchedule &Schedule);
440  void updateInstruction(MachineInstr *NewMI, bool LastDef,
441  unsigned CurStageNum, unsigned InstStageNum,
442  SMSchedule &Schedule, ValueMapTy *VRMap);
443  MachineInstr *findDefInLoop(unsigned Reg);
444  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
445  unsigned LoopStage, ValueMapTy *VRMap,
446  MachineBasicBlock *BB);
447  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
448  SMSchedule &Schedule, ValueMapTy *VRMap,
449  InstrMapTy &InstrMap);
450  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
451  InstrMapTy &InstrMap, unsigned CurStageNum,
452  unsigned PhiNum, MachineInstr *Phi,
453  unsigned OldReg, unsigned NewReg,
454  unsigned PrevReg = 0);
455  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
456  unsigned &OffsetPos, unsigned &NewBase,
457  int64_t &NewOffset);
458  void postprocessDAG();
459 };
460 
461 /// A NodeSet contains a set of SUnit DAG nodes with additional information
462 /// that assigns a priority to the set.
463 class NodeSet {
464  SetVector<SUnit *> Nodes;
465  bool HasRecurrence = false;
466  unsigned RecMII = 0;
467  int MaxMOV = 0;
468  int MaxDepth = 0;
469  unsigned Colocate = 0;
470  SUnit *ExceedPressure = nullptr;
471 
472 public:
473  using iterator = SetVector<SUnit *>::const_iterator;
474 
475  NodeSet() = default;
476  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
477 
478  bool insert(SUnit *SU) { return Nodes.insert(SU); }
479 
480  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
481 
482  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
483  return Nodes.remove_if(P);
484  }
485 
486  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
487 
488  bool hasRecurrence() { return HasRecurrence; };
489 
490  unsigned size() const { return Nodes.size(); }
491 
492  bool empty() const { return Nodes.empty(); }
493 
494  SUnit *getNode(unsigned i) const { return Nodes[i]; };
495 
496  void setRecMII(unsigned mii) { RecMII = mii; };
497 
498  void setColocate(unsigned c) { Colocate = c; };
499 
500  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
501 
502  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
503 
504  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
505 
506  int getRecMII() { return RecMII; }
507 
508  /// Summarize node functions for the entire node set.
509  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
510  for (SUnit *SU : *this) {
511  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
512  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
513  }
514  }
515 
516  void clear() {
517  Nodes.clear();
518  RecMII = 0;
519  HasRecurrence = false;
520  MaxMOV = 0;
521  MaxDepth = 0;
522  Colocate = 0;
523  ExceedPressure = nullptr;
524  }
525 
526  operator SetVector<SUnit *> &() { return Nodes; }
527 
528  /// Sort the node sets by importance. First, rank them by recurrence MII,
529  /// then by mobility (least mobile done first), and finally by depth.
530  /// Each node set may contain a colocate value which is used as the first
531  /// tie breaker, if it's set.
532  bool operator>(const NodeSet &RHS) const {
533  if (RecMII == RHS.RecMII) {
534  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
535  return Colocate < RHS.Colocate;
536  if (MaxMOV == RHS.MaxMOV)
537  return MaxDepth > RHS.MaxDepth;
538  return MaxMOV < RHS.MaxMOV;
539  }
540  return RecMII > RHS.RecMII;
541  }
542 
543  bool operator==(const NodeSet &RHS) const {
544  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
545  MaxDepth == RHS.MaxDepth;
546  }
547 
548  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
549 
550  iterator begin() { return Nodes.begin(); }
551  iterator end() { return Nodes.end(); }
552 
553  void print(raw_ostream &os) const {
554  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
555  << " depth " << MaxDepth << " col " << Colocate << "\n";
556  for (const auto &I : Nodes)
557  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
558  os << "\n";
559  }
560 
561 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
563 #endif
564 };
565 
566 /// This class repesents the scheduled code. The main data structure is a
567 /// map from scheduled cycle to instructions. During scheduling, the
568 /// data structure explicitly represents all stages/iterations. When
569 /// the algorithm finshes, the schedule is collapsed into a single stage,
570 /// which represents instructions from different loop iterations.
571 ///
572 /// The SMS algorithm allows negative values for cycles, so the first cycle
573 /// in the schedule is the smallest cycle value.
574 class SMSchedule {
575 private:
576  /// Map from execution cycle to instructions.
577  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
578 
579  /// Map from instruction to execution cycle.
580  std::map<SUnit *, int> InstrToCycle;
581 
582  /// Map for each register and the max difference between its uses and def.
583  /// The first element in the pair is the max difference in stages. The
584  /// second is true if the register defines a Phi value and loop value is
585  /// scheduled before the Phi.
586  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
587 
588  /// Keep track of the first cycle value in the schedule. It starts
589  /// as zero, but the algorithm allows negative values.
590  int FirstCycle = 0;
591 
592  /// Keep track of the last cycle value in the schedule.
593  int LastCycle = 0;
594 
595  /// The initiation interval (II) for the schedule.
596  int InitiationInterval = 0;
597 
598  /// Target machine information.
599  const TargetSubtargetInfo &ST;
600 
601  /// Virtual register information.
603 
604  std::unique_ptr<DFAPacketizer> Resources;
605 
606 public:
607  SMSchedule(MachineFunction *mf)
608  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
609  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
610 
611  void reset() {
612  ScheduledInstrs.clear();
613  InstrToCycle.clear();
614  RegToStageDiff.clear();
615  FirstCycle = 0;
616  LastCycle = 0;
617  InitiationInterval = 0;
618  }
619 
620  /// Set the initiation interval for this schedule.
621  void setInitiationInterval(int ii) { InitiationInterval = ii; }
622 
623  /// Return the first cycle in the completed schedule. This
624  /// can be a negative value.
625  int getFirstCycle() const { return FirstCycle; }
626 
627  /// Return the last cycle in the finalized schedule.
628  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
629 
630  /// Return the cycle of the earliest scheduled instruction in the dependence
631  /// chain.
632  int earliestCycleInChain(const SDep &Dep);
633 
634  /// Return the cycle of the latest scheduled instruction in the dependence
635  /// chain.
636  int latestCycleInChain(const SDep &Dep);
637 
638  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
639  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
640  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
641 
642  /// Iterators for the cycle to instruction map.
643  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
644  using const_sched_iterator =
645  DenseMap<int, std::deque<SUnit *>>::const_iterator;
646 
647  /// Return true if the instruction is scheduled at the specified stage.
648  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
649  return (stageScheduled(SU) == (int)StageNum);
650  }
651 
652  /// Return the stage for a scheduled instruction. Return -1 if
653  /// the instruction has not been scheduled.
654  int stageScheduled(SUnit *SU) const {
655  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
656  if (it == InstrToCycle.end())
657  return -1;
658  return (it->second - FirstCycle) / InitiationInterval;
659  }
660 
661  /// Return the cycle for a scheduled instruction. This function normalizes
662  /// the first cycle to be 0.
663  unsigned cycleScheduled(SUnit *SU) const {
664  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
665  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
666  return (it->second - FirstCycle) % InitiationInterval;
667  }
668 
669  /// Return the maximum stage count needed for this schedule.
670  unsigned getMaxStageCount() {
671  return (LastCycle - FirstCycle) / InitiationInterval;
672  }
673 
674  /// Return the max. number of stages/iterations that can occur between a
675  /// register definition and its uses.
676  unsigned getStagesForReg(int Reg, unsigned CurStage) {
677  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
678  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
679  return 1;
680  return Stages.first;
681  }
682 
683  /// The number of stages for a Phi is a little different than other
684  /// instructions. The minimum value computed in RegToStageDiff is 1
685  /// because we assume the Phi is needed for at least 1 iteration.
686  /// This is not the case if the loop value is scheduled prior to the
687  /// Phi in the same stage. This function returns the number of stages
688  /// or iterations needed between the Phi definition and any uses.
689  unsigned getStagesForPhi(int Reg) {
690  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
691  if (Stages.second)
692  return Stages.first;
693  return Stages.first - 1;
694  }
695 
696  /// Return the instructions that are scheduled at the specified cycle.
697  std::deque<SUnit *> &getInstructions(int cycle) {
698  return ScheduledInstrs[cycle];
699  }
700 
701  bool isValidSchedule(SwingSchedulerDAG *SSD);
702  void finalizeSchedule(SwingSchedulerDAG *SSD);
703  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
704  std::deque<SUnit *> &Insts);
705  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
706  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
707  MachineOperand &MO);
708  void print(raw_ostream &os) const;
709  void dump() const;
710 };
711 
712 } // end anonymous namespace
713 
714 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
715 char MachinePipeliner::ID = 0;
716 #ifndef NDEBUG
717 int MachinePipeliner::NumTries = 0;
718 #endif
720 
721 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
722  "Modulo Software Pipelining", false, false)
727 INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
728  "Modulo Software Pipelining", false, false)
729 
730 /// The "main" function for implementing Swing Modulo Scheduling.
731 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
732  if (skipFunction(mf.getFunction()))
733  return false;
734 
735  if (!EnableSWP)
736  return false;
737 
738  if (mf.getFunction().getAttributes().hasAttribute(
739  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
740  !EnableSWPOptSize.getPosition())
741  return false;
742 
743  MF = &mf;
744  MLI = &getAnalysis<MachineLoopInfo>();
745  MDT = &getAnalysis<MachineDominatorTree>();
746  TII = MF->getSubtarget().getInstrInfo();
747  RegClassInfo.runOnMachineFunction(*MF);
748 
749  for (auto &L : *MLI)
750  scheduleLoop(*L);
751 
752  return false;
753 }
754 
755 /// Attempt to perform the SMS algorithm on the specified loop. This function is
756 /// the main entry point for the algorithm. The function identifies candidate
757 /// loops, calculates the minimum initiation interval, and attempts to schedule
758 /// the loop.
759 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
760  bool Changed = false;
761  for (auto &InnerLoop : L)
762  Changed |= scheduleLoop(*InnerLoop);
763 
764 #ifndef NDEBUG
765  // Stop trying after reaching the limit (if any).
766  int Limit = SwpLoopLimit;
767  if (Limit >= 0) {
768  if (NumTries >= SwpLoopLimit)
769  return Changed;
770  NumTries++;
771  }
772 #endif
773 
774  if (!canPipelineLoop(L))
775  return Changed;
776 
777  ++NumTrytoPipeline;
778 
779  Changed = swingModuloScheduler(L);
780 
781  return Changed;
782 }
783 
784 /// Return true if the loop can be software pipelined. The algorithm is
785 /// restricted to loops with a single basic block. Make sure that the
786 /// branch in the loop can be analyzed.
787 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
788  if (L.getNumBlocks() != 1)
789  return false;
790 
791  // Check if the branch can't be understood because we can't do pipelining
792  // if that's the case.
793  LI.TBB = nullptr;
794  LI.FBB = nullptr;
795  LI.BrCond.clear();
796  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
797  return false;
798 
799  LI.LoopInductionVar = nullptr;
800  LI.LoopCompare = nullptr;
801  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
802  return false;
803 
804  if (!L.getLoopPreheader())
805  return false;
806 
807  // If any of the Phis contain subregs, then we can't pipeline
808  // because we don't know how to maintain subreg information in the
809  // VMap structure.
810  MachineBasicBlock *MBB = L.getHeader();
811  for (auto &PHI : MBB->phis())
812  for (unsigned i = 1; i != PHI.getNumOperands(); i += 2)
813  if (PHI.getOperand(i).getSubReg() != 0)
814  return false;
815 
816  return true;
817 }
818 
819 /// The SMS algorithm consists of the following main steps:
820 /// 1. Computation and analysis of the dependence graph.
821 /// 2. Ordering of the nodes (instructions).
822 /// 3. Attempt to Schedule the loop.
823 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
824  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
825 
826  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
827 
828  MachineBasicBlock *MBB = L.getHeader();
829  // The kernel should not include any terminator instructions. These
830  // will be added back later.
831  SMS.startBlock(MBB);
832 
833  // Compute the number of 'real' instructions in the basic block by
834  // ignoring terminators.
835  unsigned size = MBB->size();
837  E = MBB->instr_end();
838  I != E; ++I, --size)
839  ;
840 
841  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
842  SMS.schedule();
843  SMS.exitRegion();
844 
845  SMS.finishBlock();
846  return SMS.hasNewSchedule();
847 }
848 
849 /// We override the schedule function in ScheduleDAGInstrs to implement the
850 /// scheduling part of the Swing Modulo Scheduling algorithm.
851 void SwingSchedulerDAG::schedule() {
852  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
853  buildSchedGraph(AA);
854  addLoopCarriedDependences(AA);
855  updatePhiDependences();
857  postprocessDAG();
858  changeDependences();
859  DEBUG({
860  for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
861  SUnits[su].dumpAll(this);
862  });
863 
864  NodeSetType NodeSets;
865  findCircuits(NodeSets);
866 
867  // Calculate the MII.
868  unsigned ResMII = calculateResMII();
869  unsigned RecMII = calculateRecMII(NodeSets);
870 
871  fuseRecs(NodeSets);
872 
873  // This flag is used for testing and can cause correctness problems.
874  if (SwpIgnoreRecMII)
875  RecMII = 0;
876 
877  MII = std::max(ResMII, RecMII);
878  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
879  << ")\n");
880 
881  // Can't schedule a loop without a valid MII.
882  if (MII == 0)
883  return;
884 
885  // Don't pipeline large loops.
886  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
887  return;
888 
889  computeNodeFunctions(NodeSets);
890 
891  registerPressureFilter(NodeSets);
892 
893  colocateNodeSets(NodeSets);
894 
895  checkNodeSets(NodeSets);
896 
897  DEBUG({
898  for (auto &I : NodeSets) {
899  dbgs() << " Rec NodeSet ";
900  I.dump();
901  }
902  });
903 
904  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
905 
906  groupRemainingNodes(NodeSets);
907 
908  removeDuplicateNodes(NodeSets);
909 
910  DEBUG({
911  for (auto &I : NodeSets) {
912  dbgs() << " NodeSet ";
913  I.dump();
914  }
915  });
916 
917  computeNodeOrder(NodeSets);
918 
919  SMSchedule Schedule(Pass.MF);
920  Scheduled = schedulePipeline(Schedule);
921 
922  if (!Scheduled)
923  return;
924 
925  unsigned numStages = Schedule.getMaxStageCount();
926  // No need to generate pipeline if there are no overlapped iterations.
927  if (numStages == 0)
928  return;
929 
930  // Check that the maximum stage count is less than user-defined limit.
931  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
932  return;
933 
934  generatePipelinedLoop(Schedule);
935  ++NumPipelined;
936 }
937 
938 /// Clean up after the software pipeliner runs.
939 void SwingSchedulerDAG::finishBlock() {
940  for (MachineInstr *I : NewMIs)
941  MF.DeleteMachineInstr(I);
942  NewMIs.clear();
943 
944  // Call the superclass.
946 }
947 
948 /// Return the register values for the operands of a Phi instruction.
949 /// This function assume the instruction is a Phi.
950 static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
951  unsigned &InitVal, unsigned &LoopVal) {
952  assert(Phi.isPHI() && "Expecting a Phi.");
953 
954  InitVal = 0;
955  LoopVal = 0;
956  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
957  if (Phi.getOperand(i + 1).getMBB() != Loop)
958  InitVal = Phi.getOperand(i).getReg();
959  else
960  LoopVal = Phi.getOperand(i).getReg();
961 
962  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
963 }
964 
965 /// Return the Phi register value that comes from the incoming block.
966 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
967  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
968  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
969  return Phi.getOperand(i).getReg();
970  return 0;
971 }
972 
973 /// Return the Phi register value that comes the the loop block.
974 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
975  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
976  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
977  return Phi.getOperand(i).getReg();
978  return 0;
979 }
980 
981 /// Return true if SUb can be reached from SUa following the chain edges.
982 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
983  SmallPtrSet<SUnit *, 8> Visited;
984  SmallVector<SUnit *, 8> Worklist;
985  Worklist.push_back(SUa);
986  while (!Worklist.empty()) {
987  const SUnit *SU = Worklist.pop_back_val();
988  for (auto &SI : SU->Succs) {
989  SUnit *SuccSU = SI.getSUnit();
990  if (SI.getKind() == SDep::Order) {
991  if (Visited.count(SuccSU))
992  continue;
993  if (SuccSU == SUb)
994  return true;
995  Worklist.push_back(SuccSU);
996  Visited.insert(SuccSU);
997  }
998  }
999  }
1000  return false;
1001 }
1002 
1003 /// Return true if the instruction causes a chain between memory
1004 /// references before and after it.
1006  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1007  (MI.hasOrderedMemoryRef() &&
1008  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1009 }
1010 
1011 /// Return the underlying objects for the memory references of an instruction.
1012 /// This function calls the code in ValueTracking, but first checks that the
1013 /// instruction has a memory operand.
1016  const DataLayout &DL) {
1017  if (!MI->hasOneMemOperand())
1018  return;
1019  MachineMemOperand *MM = *MI->memoperands_begin();
1020  if (!MM->getValue())
1021  return;
1022  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1023 }
1024 
1025 /// Add a chain edge between a load and store if the store can be an
1026 /// alias of the load on a subsequent iteration, i.e., a loop carried
1027 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1028 /// but that code doesn't create loop carried dependences.
1029 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1031  for (auto &SU : SUnits) {
1032  MachineInstr &MI = *SU.getInstr();
1033  if (isDependenceBarrier(MI, AA))
1034  PendingLoads.clear();
1035  else if (MI.mayLoad()) {
1037  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1038  for (auto V : Objs) {
1039  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1040  SUs.push_back(&SU);
1041  }
1042  } else if (MI.mayStore()) {
1044  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1045  for (auto V : Objs) {
1047  PendingLoads.find(V);
1048  if (I == PendingLoads.end())
1049  continue;
1050  for (auto Load : I->second) {
1051  if (isSuccOrder(Load, &SU))
1052  continue;
1053  MachineInstr &LdMI = *Load->getInstr();
1054  // First, perform the cheaper check that compares the base register.
1055  // If they are the same and the load offset is less than the store
1056  // offset, then mark the dependence as loop carried potentially.
1057  unsigned BaseReg1, BaseReg2;
1058  int64_t Offset1, Offset2;
1059  if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
1060  !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
1062  continue;
1063  }
1064  if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
1065  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1066  "What happened to the chain edge?");
1068  continue;
1069  }
1070  // Second, the more expensive check that uses alias analysis on the
1071  // base registers. If they alias, and the load offset is less than
1072  // the store offset, the mark the dependence as loop carried.
1073  if (!AA) {
1075  continue;
1076  }
1077  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1078  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1079  if (!MMO1->getValue() || !MMO2->getValue()) {
1081  continue;
1082  }
1083  if (MMO1->getValue() == MMO2->getValue() &&
1084  MMO1->getOffset() <= MMO2->getOffset()) {
1086  continue;
1087  }
1088  AliasResult AAResult = AA->alias(
1090  MMO1->getAAInfo()),
1092  MMO2->getAAInfo()));
1093 
1094  if (AAResult != NoAlias)
1096  }
1097  }
1098  }
1099  }
1100 }
1101 
1102 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1103 /// processes dependences for PHIs. This function adds true dependences
1104 /// from a PHI to a use, and a loop carried dependence from the use to the
1105 /// PHI. The loop carried dependence is represented as an anti dependence
1106 /// edge. This function also removes chain dependences between unrelated
1107 /// PHIs.
1108 void SwingSchedulerDAG::updatePhiDependences() {
1109  SmallVector<SDep, 4> RemoveDeps;
1111 
1112  // Iterate over each DAG node.
1113  for (SUnit &I : SUnits) {
1114  RemoveDeps.clear();
1115  // Set to true if the instruction has an operand defined by a Phi.
1116  unsigned HasPhiUse = 0;
1117  unsigned HasPhiDef = 0;
1118  MachineInstr *MI = I.getInstr();
1119  // Iterate over each operand, and we process the definitions.
1120  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1121  MOE = MI->operands_end();
1122  MOI != MOE; ++MOI) {
1123  if (!MOI->isReg())
1124  continue;
1125  unsigned Reg = MOI->getReg();
1126  if (MOI->isDef()) {
1127  // If the register is used by a Phi, then create an anti dependence.
1129  UI = MRI.use_instr_begin(Reg),
1130  UE = MRI.use_instr_end();
1131  UI != UE; ++UI) {
1132  MachineInstr *UseMI = &*UI;
1133  SUnit *SU = getSUnit(UseMI);
1134  if (SU != nullptr && UseMI->isPHI()) {
1135  if (!MI->isPHI()) {
1136  SDep Dep(SU, SDep::Anti, Reg);
1137  I.addPred(Dep);
1138  } else {
1139  HasPhiDef = Reg;
1140  // Add a chain edge to a dependent Phi that isn't an existing
1141  // predecessor.
1142  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1143  I.addPred(SDep(SU, SDep::Barrier));
1144  }
1145  }
1146  }
1147  } else if (MOI->isUse()) {
1148  // If the register is defined by a Phi, then create a true dependence.
1149  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1150  if (DefMI == nullptr)
1151  continue;
1152  SUnit *SU = getSUnit(DefMI);
1153  if (SU != nullptr && DefMI->isPHI()) {
1154  if (!MI->isPHI()) {
1155  SDep Dep(SU, SDep::Data, Reg);
1156  Dep.setLatency(0);
1157  ST.adjustSchedDependency(SU, &I, Dep);
1158  I.addPred(Dep);
1159  } else {
1160  HasPhiUse = Reg;
1161  // Add a chain edge to a dependent Phi that isn't an existing
1162  // predecessor.
1163  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1164  I.addPred(SDep(SU, SDep::Barrier));
1165  }
1166  }
1167  }
1168  }
1169  // Remove order dependences from an unrelated Phi.
1170  if (!SwpPruneDeps)
1171  continue;
1172  for (auto &PI : I.Preds) {
1173  MachineInstr *PMI = PI.getSUnit()->getInstr();
1174  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1175  if (I.getInstr()->isPHI()) {
1176  if (PMI->getOperand(0).getReg() == HasPhiUse)
1177  continue;
1178  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1179  continue;
1180  }
1181  RemoveDeps.push_back(PI);
1182  }
1183  }
1184  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1185  I.removePred(RemoveDeps[i]);
1186  }
1187 }
1188 
1189 /// Iterate over each DAG node and see if we can change any dependences
1190 /// in order to reduce the recurrence MII.
1191 void SwingSchedulerDAG::changeDependences() {
1192  // See if an instruction can use a value from the previous iteration.
1193  // If so, we update the base and offset of the instruction and change
1194  // the dependences.
1195  for (SUnit &I : SUnits) {
1196  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1197  int64_t NewOffset = 0;
1198  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1199  NewOffset))
1200  continue;
1201 
1202  // Get the MI and SUnit for the instruction that defines the original base.
1203  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1204  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1205  if (!DefMI)
1206  continue;
1207  SUnit *DefSU = getSUnit(DefMI);
1208  if (!DefSU)
1209  continue;
1210  // Get the MI and SUnit for the instruction that defins the new base.
1211  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1212  if (!LastMI)
1213  continue;
1214  SUnit *LastSU = getSUnit(LastMI);
1215  if (!LastSU)
1216  continue;
1217 
1218  if (Topo.IsReachable(&I, LastSU))
1219  continue;
1220 
1221  // Remove the dependence. The value now depends on a prior iteration.
1222  SmallVector<SDep, 4> Deps;
1223  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1224  ++P)
1225  if (P->getSUnit() == DefSU)
1226  Deps.push_back(*P);
1227  for (int i = 0, e = Deps.size(); i != e; i++) {
1228  Topo.RemovePred(&I, Deps[i].getSUnit());
1229  I.removePred(Deps[i]);
1230  }
1231  // Remove the chain dependence between the instructions.
1232  Deps.clear();
1233  for (auto &P : LastSU->Preds)
1234  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1235  Deps.push_back(P);
1236  for (int i = 0, e = Deps.size(); i != e; i++) {
1237  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1238  LastSU->removePred(Deps[i]);
1239  }
1240 
1241  // Add a dependence between the new instruction and the instruction
1242  // that defines the new base.
1243  SDep Dep(&I, SDep::Anti, NewBase);
1244  LastSU->addPred(Dep);
1245 
1246  // Remember the base and offset information so that we can update the
1247  // instruction during code generation.
1248  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1249  }
1250 }
1251 
1252 namespace {
1253 
1254 // FuncUnitSorter - Comparison operator used to sort instructions by
1255 // the number of functional unit choices.
1256 struct FuncUnitSorter {
1257  const InstrItineraryData *InstrItins;
1258  DenseMap<unsigned, unsigned> Resources;
1259 
1260  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1261 
1262  // Compute the number of functional unit alternatives needed
1263  // at each stage, and take the minimum value. We prioritize the
1264  // instructions by the least number of choices first.
1265  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1266  unsigned schedClass = Inst->getDesc().getSchedClass();
1267  unsigned min = UINT_MAX;
1268  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1269  *IE = InstrItins->endStage(schedClass);
1270  IS != IE; ++IS) {
1271  unsigned funcUnits = IS->getUnits();
1272  unsigned numAlternatives = countPopulation(funcUnits);
1273  if (numAlternatives < min) {
1274  min = numAlternatives;
1275  F = funcUnits;
1276  }
1277  }
1278  return min;
1279  }
1280 
1281  // Compute the critical resources needed by the instruction. This
1282  // function records the functional units needed by instructions that
1283  // must use only one functional unit. We use this as a tie breaker
1284  // for computing the resource MII. The instrutions that require
1285  // the same, highly used, functional unit have high priority.
1286  void calcCriticalResources(MachineInstr &MI) {
1287  unsigned SchedClass = MI.getDesc().getSchedClass();
1288  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1289  *IE = InstrItins->endStage(SchedClass);
1290  IS != IE; ++IS) {
1291  unsigned FuncUnits = IS->getUnits();
1292  if (countPopulation(FuncUnits) == 1)
1293  Resources[FuncUnits]++;
1294  }
1295  }
1296 
1297  /// Return true if IS1 has less priority than IS2.
1298  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1299  unsigned F1 = 0, F2 = 0;
1300  unsigned MFUs1 = minFuncUnits(IS1, F1);
1301  unsigned MFUs2 = minFuncUnits(IS2, F2);
1302  if (MFUs1 == 1 && MFUs2 == 1)
1303  return Resources.lookup(F1) < Resources.lookup(F2);
1304  return MFUs1 > MFUs2;
1305  }
1306 };
1307 
1308 } // end anonymous namespace
1309 
1310 /// Calculate the resource constrained minimum initiation interval for the
1311 /// specified loop. We use the DFA to model the resources needed for
1312 /// each instruction, and we ignore dependences. A different DFA is created
1313 /// for each cycle that is required. When adding a new instruction, we attempt
1314 /// to add it to each existing DFA, until a legal space is found. If the
1315 /// instruction cannot be reserved in an existing DFA, we create a new one.
1316 unsigned SwingSchedulerDAG::calculateResMII() {
1318  MachineBasicBlock *MBB = Loop.getHeader();
1319  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1320 
1321  // Sort the instructions by the number of available choices for scheduling,
1322  // least to most. Use the number of critical resources as the tie breaker.
1323  FuncUnitSorter FUS =
1324  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1326  E = MBB->getFirstTerminator();
1327  I != E; ++I)
1328  FUS.calcCriticalResources(*I);
1330  FuncUnitOrder(FUS);
1331 
1333  E = MBB->getFirstTerminator();
1334  I != E; ++I)
1335  FuncUnitOrder.push(&*I);
1336 
1337  while (!FuncUnitOrder.empty()) {
1338  MachineInstr *MI = FuncUnitOrder.top();
1339  FuncUnitOrder.pop();
1340  if (TII->isZeroCost(MI->getOpcode()))
1341  continue;
1342  // Attempt to reserve the instruction in an existing DFA. At least one
1343  // DFA is needed for each cycle.
1344  unsigned NumCycles = getSUnit(MI)->Latency;
1345  unsigned ReservedCycles = 0;
1348  for (unsigned C = 0; C < NumCycles; ++C)
1349  while (RI != RE) {
1350  if ((*RI++)->canReserveResources(*MI)) {
1351  ++ReservedCycles;
1352  break;
1353  }
1354  }
1355  // Start reserving resources using existing DFAs.
1356  for (unsigned C = 0; C < ReservedCycles; ++C) {
1357  --RI;
1358  (*RI)->reserveResources(*MI);
1359  }
1360  // Add new DFAs, if needed, to reserve resources.
1361  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1362  DFAPacketizer *NewResource =
1364  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1365  NewResource->reserveResources(*MI);
1366  Resources.push_back(NewResource);
1367  }
1368  }
1369  int Resmii = Resources.size();
1370  // Delete the memory for each of the DFAs that were created earlier.
1371  for (DFAPacketizer *RI : Resources) {
1372  DFAPacketizer *D = RI;
1373  delete D;
1374  }
1375  Resources.clear();
1376  return Resmii;
1377 }
1378 
1379 /// Calculate the recurrence-constrainted minimum initiation interval.
1380 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1381 /// for each circuit. The II needs to satisfy the inequality
1382 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1383 /// II that satistifies the inequality, and the RecMII is the maximum
1384 /// of those values.
1385 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1386  unsigned RecMII = 0;
1387 
1388  for (NodeSet &Nodes : NodeSets) {
1389  if (Nodes.empty())
1390  continue;
1391 
1392  unsigned Delay = Nodes.size() - 1;
1393  unsigned Distance = 1;
1394 
1395  // ii = ceil(delay / distance)
1396  unsigned CurMII = (Delay + Distance - 1) / Distance;
1397  Nodes.setRecMII(CurMII);
1398  if (CurMII > RecMII)
1399  RecMII = CurMII;
1400  }
1401 
1402  return RecMII;
1403 }
1404 
1405 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1406 /// but we do this to find the circuits, and then change them back.
1407 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1409  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1410  SUnit *SU = &SUnits[i];
1411  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1412  IP != EP; ++IP) {
1413  if (IP->getKind() != SDep::Anti)
1414  continue;
1415  DepsAdded.push_back(std::make_pair(SU, *IP));
1416  }
1417  }
1418  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1419  E = DepsAdded.end();
1420  I != E; ++I) {
1421  // Remove this anti dependency and add one in the reverse direction.
1422  SUnit *SU = I->first;
1423  SDep &D = I->second;
1424  SUnit *TargetSU = D.getSUnit();
1425  unsigned Reg = D.getReg();
1426  unsigned Lat = D.getLatency();
1427  SU->removePred(D);
1428  SDep Dep(SU, SDep::Anti, Reg);
1429  Dep.setLatency(Lat);
1430  TargetSU->addPred(Dep);
1431  }
1432 }
1433 
1434 /// Create the adjacency structure of the nodes in the graph.
1435 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1436  SwingSchedulerDAG *DAG) {
1437  BitVector Added(SUnits.size());
1438  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1439  Added.reset();
1440  // Add any successor to the adjacency matrix and exclude duplicates.
1441  for (auto &SI : SUnits[i].Succs) {
1442  // Do not process a boundary node and a back-edge is processed only
1443  // if it goes to a Phi.
1444  if (SI.getSUnit()->isBoundaryNode() ||
1445  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1446  continue;
1447  int N = SI.getSUnit()->NodeNum;
1448  if (!Added.test(N)) {
1449  AdjK[i].push_back(N);
1450  Added.set(N);
1451  }
1452  }
1453  // A chain edge between a store and a load is treated as a back-edge in the
1454  // adjacency matrix.
1455  for (auto &PI : SUnits[i].Preds) {
1456  if (!SUnits[i].getInstr()->mayStore() ||
1457  !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
1458  continue;
1459  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1460  int N = PI.getSUnit()->NodeNum;
1461  if (!Added.test(N)) {
1462  AdjK[i].push_back(N);
1463  Added.set(N);
1464  }
1465  }
1466  }
1467  }
1468 }
1469 
1470 /// Identify an elementary circuit in the dependence graph starting at the
1471 /// specified node.
1472 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1473  bool HasBackedge) {
1474  SUnit *SV = &SUnits[V];
1475  bool F = false;
1476  Stack.insert(SV);
1477  Blocked.set(V);
1478 
1479  for (auto W : AdjK[V]) {
1480  if (NumPaths > MaxPaths)
1481  break;
1482  if (W < S)
1483  continue;
1484  if (W == S) {
1485  if (!HasBackedge)
1486  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1487  F = true;
1488  ++NumPaths;
1489  break;
1490  } else if (!Blocked.test(W)) {
1491  if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
1492  F = true;
1493  }
1494  }
1495 
1496  if (F)
1497  unblock(V);
1498  else {
1499  for (auto W : AdjK[V]) {
1500  if (W < S)
1501  continue;
1502  if (B[W].count(SV) == 0)
1503  B[W].insert(SV);
1504  }
1505  }
1506  Stack.pop_back();
1507  return F;
1508 }
1509 
1510 /// Unblock a node in the circuit finding algorithm.
1511 void SwingSchedulerDAG::Circuits::unblock(int U) {
1512  Blocked.reset(U);
1513  SmallPtrSet<SUnit *, 4> &BU = B[U];
1514  while (!BU.empty()) {
1516  assert(SI != BU.end() && "Invalid B set.");
1517  SUnit *W = *SI;
1518  BU.erase(W);
1519  if (Blocked.test(W->NodeNum))
1520  unblock(W->NodeNum);
1521  }
1522 }
1523 
1524 /// Identify all the elementary circuits in the dependence graph using
1525 /// Johnson's circuit algorithm.
1526 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1527  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1528  // but we do this to find the circuits, and then change them back.
1529  swapAntiDependences(SUnits);
1530 
1531  Circuits Cir(SUnits);
1532  // Create the adjacency structure.
1533  Cir.createAdjacencyStructure(this);
1534  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1535  Cir.reset();
1536  Cir.circuit(i, i, NodeSets);
1537  }
1538 
1539  // Change the dependences back so that we've created a DAG again.
1540  swapAntiDependences(SUnits);
1541 }
1542 
1543 /// Return true for DAG nodes that we ignore when computing the cost functions.
1544 /// We ignore the back-edge recurrence in order to avoid unbounded recurison
1545 /// in the calculation of the ASAP, ALAP, etc functions.
1546 static bool ignoreDependence(const SDep &D, bool isPred) {
1547  if (D.isArtificial())
1548  return true;
1549  return D.getKind() == SDep::Anti && isPred;
1550 }
1551 
1552 /// Compute several functions need to order the nodes for scheduling.
1553 /// ASAP - Earliest time to schedule a node.
1554 /// ALAP - Latest time to schedule a node.
1555 /// MOV - Mobility function, difference between ALAP and ASAP.
1556 /// D - Depth of each node.
1557 /// H - Height of each node.
1558 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1559  ScheduleInfo.resize(SUnits.size());
1560 
1561  DEBUG({
1563  E = Topo.end();
1564  I != E; ++I) {
1565  SUnit *SU = &SUnits[*I];
1566  SU->dump(this);
1567  }
1568  });
1569 
1570  int maxASAP = 0;
1571  // Compute ASAP.
1573  E = Topo.end();
1574  I != E; ++I) {
1575  int asap = 0;
1576  SUnit *SU = &SUnits[*I];
1577  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1578  EP = SU->Preds.end();
1579  IP != EP; ++IP) {
1580  if (ignoreDependence(*IP, true))
1581  continue;
1582  SUnit *pred = IP->getSUnit();
1583  asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
1584  getDistance(pred, SU, *IP) * MII));
1585  }
1586  maxASAP = std::max(maxASAP, asap);
1587  ScheduleInfo[*I].ASAP = asap;
1588  }
1589 
1590  // Compute ALAP and MOV.
1592  E = Topo.rend();
1593  I != E; ++I) {
1594  int alap = maxASAP;
1595  SUnit *SU = &SUnits[*I];
1596  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1597  ES = SU->Succs.end();
1598  IS != ES; ++IS) {
1599  if (ignoreDependence(*IS, true))
1600  continue;
1601  SUnit *succ = IS->getSUnit();
1602  alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
1603  getDistance(SU, succ, *IS) * MII));
1604  }
1605 
1606  ScheduleInfo[*I].ALAP = alap;
1607  }
1608 
1609  // After computing the node functions, compute the summary for each node set.
1610  for (NodeSet &I : NodeSets)
1611  I.computeNodeSetInfo(this);
1612 
1613  DEBUG({
1614  for (unsigned i = 0; i < SUnits.size(); i++) {
1615  dbgs() << "\tNode " << i << ":\n";
1616  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1617  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1618  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1619  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1620  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1621  }
1622  });
1623 }
1624 
1625 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1626 /// as the predecessors of the elements of NodeOrder that are not also in
1627 /// NodeOrder.
1628 static bool pred_L(SetVector<SUnit *> &NodeOrder,
1630  const NodeSet *S = nullptr) {
1631  Preds.clear();
1632  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1633  I != E; ++I) {
1634  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1635  PI != PE; ++PI) {
1636  if (S && S->count(PI->getSUnit()) == 0)
1637  continue;
1638  if (ignoreDependence(*PI, true))
1639  continue;
1640  if (NodeOrder.count(PI->getSUnit()) == 0)
1641  Preds.insert(PI->getSUnit());
1642  }
1643  // Back-edges are predecessors with an anti-dependence.
1644  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1645  ES = (*I)->Succs.end();
1646  IS != ES; ++IS) {
1647  if (IS->getKind() != SDep::Anti)
1648  continue;
1649  if (S && S->count(IS->getSUnit()) == 0)
1650  continue;
1651  if (NodeOrder.count(IS->getSUnit()) == 0)
1652  Preds.insert(IS->getSUnit());
1653  }
1654  }
1655  return !Preds.empty();
1656 }
1657 
1658 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1659 /// as the successors of the elements of NodeOrder that are not also in
1660 /// NodeOrder.
1661 static bool succ_L(SetVector<SUnit *> &NodeOrder,
1663  const NodeSet *S = nullptr) {
1664  Succs.clear();
1665  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1666  I != E; ++I) {
1667  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1668  SI != SE; ++SI) {
1669  if (S && S->count(SI->getSUnit()) == 0)
1670  continue;
1671  if (ignoreDependence(*SI, false))
1672  continue;
1673  if (NodeOrder.count(SI->getSUnit()) == 0)
1674  Succs.insert(SI->getSUnit());
1675  }
1676  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1677  PE = (*I)->Preds.end();
1678  PI != PE; ++PI) {
1679  if (PI->getKind() != SDep::Anti)
1680  continue;
1681  if (S && S->count(PI->getSUnit()) == 0)
1682  continue;
1683  if (NodeOrder.count(PI->getSUnit()) == 0)
1684  Succs.insert(PI->getSUnit());
1685  }
1686  }
1687  return !Succs.empty();
1688 }
1689 
1690 /// Return true if there is a path from the specified node to any of the nodes
1691 /// in DestNodes. Keep track and return the nodes in any path.
1692 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1693  SetVector<SUnit *> &DestNodes,
1694  SetVector<SUnit *> &Exclude,
1695  SmallPtrSet<SUnit *, 8> &Visited) {
1696  if (Cur->isBoundaryNode())
1697  return false;
1698  if (Exclude.count(Cur) != 0)
1699  return false;
1700  if (DestNodes.count(Cur) != 0)
1701  return true;
1702  if (!Visited.insert(Cur).second)
1703  return Path.count(Cur) != 0;
1704  bool FoundPath = false;
1705  for (auto &SI : Cur->Succs)
1706  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1707  for (auto &PI : Cur->Preds)
1708  if (PI.getKind() == SDep::Anti)
1709  FoundPath |=
1710  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1711  if (FoundPath)
1712  Path.insert(Cur);
1713  return FoundPath;
1714 }
1715 
1716 /// Return true if Set1 is a subset of Set2.
1717 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1718  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1719  if (Set2.count(*I) == 0)
1720  return false;
1721  return true;
1722 }
1723 
1724 /// Compute the live-out registers for the instructions in a node-set.
1725 /// The live-out registers are those that are defined in the node-set,
1726 /// but not used. Except for use operands of Phis.
1728  NodeSet &NS) {
1729  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1730  MachineRegisterInfo &MRI = MF.getRegInfo();
1732  SmallSet<unsigned, 4> Uses;
1733  for (SUnit *SU : NS) {
1734  const MachineInstr *MI = SU->getInstr();
1735  if (MI->isPHI())
1736  continue;
1737  for (const MachineOperand &MO : MI->operands())
1738  if (MO.isReg() && MO.isUse()) {
1739  unsigned Reg = MO.getReg();
1741  Uses.insert(Reg);
1742  else if (MRI.isAllocatable(Reg))
1743  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1744  Uses.insert(*Units);
1745  }
1746  }
1747  for (SUnit *SU : NS)
1748  for (const MachineOperand &MO : SU->getInstr()->operands())
1749  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1750  unsigned Reg = MO.getReg();
1752  if (!Uses.count(Reg))
1753  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1755  } else if (MRI.isAllocatable(Reg)) {
1756  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1757  if (!Uses.count(*Units))
1758  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1760  }
1761  }
1762  RPTracker.addLiveRegs(LiveOutRegs);
1763 }
1764 
1765 /// A heuristic to filter nodes in recurrent node-sets if the register
1766 /// pressure of a set is too high.
1767 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1768  for (auto &NS : NodeSets) {
1769  // Skip small node-sets since they won't cause register pressure problems.
1770  if (NS.size() <= 2)
1771  continue;
1772  IntervalPressure RecRegPressure;
1773  RegPressureTracker RecRPTracker(RecRegPressure);
1774  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1775  computeLiveOuts(MF, RecRPTracker, NS);
1776  RecRPTracker.closeBottom();
1777 
1778  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1779  std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
1780  return A->NodeNum > B->NodeNum;
1781  });
1782 
1783  for (auto &SU : SUnits) {
1784  // Since we're computing the register pressure for a subset of the
1785  // instructions in a block, we need to set the tracker for each
1786  // instruction in the node-set. The tracker is set to the instruction
1787  // just after the one we're interested in.
1789  RecRPTracker.setPos(std::next(CurInstI));
1790 
1791  RegPressureDelta RPDelta;
1792  ArrayRef<PressureChange> CriticalPSets;
1793  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1794  CriticalPSets,
1795  RecRegPressure.MaxSetPressure);
1796  if (RPDelta.Excess.isValid()) {
1797  DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1798  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1799  << ":" << RPDelta.Excess.getUnitInc());
1800  NS.setExceedPressure(SU);
1801  break;
1802  }
1803  RecRPTracker.recede();
1804  }
1805  }
1806 }
1807 
1808 /// A heuristic to colocate node sets that have the same set of
1809 /// successors.
1810 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1811  unsigned Colocate = 0;
1812  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1813  NodeSet &N1 = NodeSets[i];
1815  if (N1.empty() || !succ_L(N1, S1))
1816  continue;
1817  for (int j = i + 1; j < e; ++j) {
1818  NodeSet &N2 = NodeSets[j];
1819  if (N1.compareRecMII(N2) != 0)
1820  continue;
1822  if (N2.empty() || !succ_L(N2, S2))
1823  continue;
1824  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1825  N1.setColocate(++Colocate);
1826  N2.setColocate(Colocate);
1827  break;
1828  }
1829  }
1830  }
1831 }
1832 
1833 /// Check if the existing node-sets are profitable. If not, then ignore the
1834 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1835 /// a heuristic. If the MII is large and there is a non-recurrent node with
1836 /// a large depth compared to the MII, then it's best to try and schedule
1837 /// all instruction together instead of starting with the recurrent node-sets.
1838 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1839  // Look for loops with a large MII.
1840  if (MII <= 20)
1841  return;
1842  // Check if the node-set contains only a simple add recurrence.
1843  for (auto &NS : NodeSets)
1844  if (NS.size() > 2)
1845  return;
1846  // If the depth of any instruction is significantly larger than the MII, then
1847  // ignore the recurrent node-sets and treat all instructions equally.
1848  for (auto &SU : SUnits)
1849  if (SU.getDepth() > MII * 1.5) {
1850  NodeSets.clear();
1851  DEBUG(dbgs() << "Clear recurrence node-sets\n");
1852  return;
1853  }
1854 }
1855 
1856 /// Add the nodes that do not belong to a recurrence set into groups
1857 /// based upon connected componenets.
1858 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1859  SetVector<SUnit *> NodesAdded;
1860  SmallPtrSet<SUnit *, 8> Visited;
1861  // Add the nodes that are on a path between the previous node sets and
1862  // the current node set.
1863  for (NodeSet &I : NodeSets) {
1865  // Add the nodes from the current node set to the previous node set.
1866  if (succ_L(I, N)) {
1867  SetVector<SUnit *> Path;
1868  for (SUnit *NI : N) {
1869  Visited.clear();
1870  computePath(NI, Path, NodesAdded, I, Visited);
1871  }
1872  if (!Path.empty())
1873  I.insert(Path.begin(), Path.end());
1874  }
1875  // Add the nodes from the previous node set to the current node set.
1876  N.clear();
1877  if (succ_L(NodesAdded, N)) {
1878  SetVector<SUnit *> Path;
1879  for (SUnit *NI : N) {
1880  Visited.clear();
1881  computePath(NI, Path, I, NodesAdded, Visited);
1882  }
1883  if (!Path.empty())
1884  I.insert(Path.begin(), Path.end());
1885  }
1886  NodesAdded.insert(I.begin(), I.end());
1887  }
1888 
1889  // Create a new node set with the connected nodes of any successor of a node
1890  // in a recurrent set.
1891  NodeSet NewSet;
1893  if (succ_L(NodesAdded, N))
1894  for (SUnit *I : N)
1895  addConnectedNodes(I, NewSet, NodesAdded);
1896  if (!NewSet.empty())
1897  NodeSets.push_back(NewSet);
1898 
1899  // Create a new node set with the connected nodes of any predecessor of a node
1900  // in a recurrent set.
1901  NewSet.clear();
1902  if (pred_L(NodesAdded, N))
1903  for (SUnit *I : N)
1904  addConnectedNodes(I, NewSet, NodesAdded);
1905  if (!NewSet.empty())
1906  NodeSets.push_back(NewSet);
1907 
1908  // Create new nodes sets with the connected nodes any any remaining node that
1909  // has no predecessor.
1910  for (unsigned i = 0; i < SUnits.size(); ++i) {
1911  SUnit *SU = &SUnits[i];
1912  if (NodesAdded.count(SU) == 0) {
1913  NewSet.clear();
1914  addConnectedNodes(SU, NewSet, NodesAdded);
1915  if (!NewSet.empty())
1916  NodeSets.push_back(NewSet);
1917  }
1918  }
1919 }
1920 
1921 /// Add the node to the set, and add all is its connected nodes to the set.
1922 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1923  SetVector<SUnit *> &NodesAdded) {
1924  NewSet.insert(SU);
1925  NodesAdded.insert(SU);
1926  for (auto &SI : SU->Succs) {
1927  SUnit *Successor = SI.getSUnit();
1928  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1929  addConnectedNodes(Successor, NewSet, NodesAdded);
1930  }
1931  for (auto &PI : SU->Preds) {
1932  SUnit *Predecessor = PI.getSUnit();
1933  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1934  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1935  }
1936 }
1937 
1938 /// Return true if Set1 contains elements in Set2. The elements in common
1939 /// are returned in a different container.
1940 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1941  SmallSetVector<SUnit *, 8> &Result) {
1942  Result.clear();
1943  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1944  SUnit *SU = Set1[i];
1945  if (Set2.count(SU) != 0)
1946  Result.insert(SU);
1947  }
1948  return !Result.empty();
1949 }
1950 
1951 /// Merge the recurrence node sets that have the same initial node.
1952 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1953  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1954  ++I) {
1955  NodeSet &NI = *I;
1956  for (NodeSetType::iterator J = I + 1; J != E;) {
1957  NodeSet &NJ = *J;
1958  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1959  if (NJ.compareRecMII(NI) > 0)
1960  NI.setRecMII(NJ.getRecMII());
1961  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1962  ++NII)
1963  I->insert(*NII);
1964  NodeSets.erase(J);
1965  E = NodeSets.end();
1966  } else {
1967  ++J;
1968  }
1969  }
1970  }
1971 }
1972 
1973 /// Remove nodes that have been scheduled in previous NodeSets.
1974 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1975  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1976  ++I)
1977  for (NodeSetType::iterator J = I + 1; J != E;) {
1978  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1979 
1980  if (J->empty()) {
1981  NodeSets.erase(J);
1982  E = NodeSets.end();
1983  } else {
1984  ++J;
1985  }
1986  }
1987 }
1988 
1989 /// Return true if Inst1 defines a value that is used in Inst2.
1990 static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
1991  for (auto &SI : Inst1->Succs)
1992  if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
1993  return true;
1994  return false;
1995 }
1996 
1997 /// Compute an ordered list of the dependence graph nodes, which
1998 /// indicates the order that the nodes will be scheduled. This is a
1999 /// two-level algorithm. First, a partial order is created, which
2000 /// consists of a list of sets ordered from highest to lowest priority.
2001 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2003  NodeOrder.clear();
2004 
2005  for (auto &Nodes : NodeSets) {
2006  DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2007  OrderKind Order;
2009  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2010  R.insert(N.begin(), N.end());
2011  Order = BottomUp;
2012  DEBUG(dbgs() << " Bottom up (preds) ");
2013  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2014  R.insert(N.begin(), N.end());
2015  Order = TopDown;
2016  DEBUG(dbgs() << " Top down (succs) ");
2017  } else if (isIntersect(N, Nodes, R)) {
2018  // If some of the successors are in the existing node-set, then use the
2019  // top-down ordering.
2020  Order = TopDown;
2021  DEBUG(dbgs() << " Top down (intersect) ");
2022  } else if (NodeSets.size() == 1) {
2023  for (auto &N : Nodes)
2024  if (N->Succs.size() == 0)
2025  R.insert(N);
2026  Order = BottomUp;
2027  DEBUG(dbgs() << " Bottom up (all) ");
2028  } else {
2029  // Find the node with the highest ASAP.
2030  SUnit *maxASAP = nullptr;
2031  for (SUnit *SU : Nodes) {
2032  if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
2033  maxASAP = SU;
2034  }
2035  R.insert(maxASAP);
2036  Order = BottomUp;
2037  DEBUG(dbgs() << " Bottom up (default) ");
2038  }
2039 
2040  while (!R.empty()) {
2041  if (Order == TopDown) {
2042  // Choose the node with the maximum height. If more than one, choose
2043  // the node with the lowest MOV. If still more than one, check if there
2044  // is a dependence between the instructions.
2045  while (!R.empty()) {
2046  SUnit *maxHeight = nullptr;
2047  for (SUnit *I : R) {
2048  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2049  maxHeight = I;
2050  else if (getHeight(I) == getHeight(maxHeight) &&
2051  getMOV(I) < getMOV(maxHeight) &&
2052  !hasDataDependence(maxHeight, I))
2053  maxHeight = I;
2054  else if (hasDataDependence(I, maxHeight))
2055  maxHeight = I;
2056  }
2057  NodeOrder.insert(maxHeight);
2058  DEBUG(dbgs() << maxHeight->NodeNum << " ");
2059  R.remove(maxHeight);
2060  for (const auto &I : maxHeight->Succs) {
2061  if (Nodes.count(I.getSUnit()) == 0)
2062  continue;
2063  if (NodeOrder.count(I.getSUnit()) != 0)
2064  continue;
2065  if (ignoreDependence(I, false))
2066  continue;
2067  R.insert(I.getSUnit());
2068  }
2069  // Back-edges are predecessors with an anti-dependence.
2070  for (const auto &I : maxHeight->Preds) {
2071  if (I.getKind() != SDep::Anti)
2072  continue;
2073  if (Nodes.count(I.getSUnit()) == 0)
2074  continue;
2075  if (NodeOrder.count(I.getSUnit()) != 0)
2076  continue;
2077  R.insert(I.getSUnit());
2078  }
2079  }
2080  Order = BottomUp;
2081  DEBUG(dbgs() << "\n Switching order to bottom up ");
2083  if (pred_L(NodeOrder, N, &Nodes))
2084  R.insert(N.begin(), N.end());
2085  } else {
2086  // Choose the node with the maximum depth. If more than one, choose
2087  // the node with the lowest MOV. If there is still more than one, check
2088  // for a dependence between the instructions.
2089  while (!R.empty()) {
2090  SUnit *maxDepth = nullptr;
2091  for (SUnit *I : R) {
2092  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2093  maxDepth = I;
2094  else if (getDepth(I) == getDepth(maxDepth) &&
2095  getMOV(I) < getMOV(maxDepth) &&
2096  !hasDataDependence(I, maxDepth))
2097  maxDepth = I;
2098  else if (hasDataDependence(maxDepth, I))
2099  maxDepth = I;
2100  }
2101  NodeOrder.insert(maxDepth);
2102  DEBUG(dbgs() << maxDepth->NodeNum << " ");
2103  R.remove(maxDepth);
2104  if (Nodes.isExceedSU(maxDepth)) {
2105  Order = TopDown;
2106  R.clear();
2107  R.insert(Nodes.getNode(0));
2108  break;
2109  }
2110  for (const auto &I : maxDepth->Preds) {
2111  if (Nodes.count(I.getSUnit()) == 0)
2112  continue;
2113  if (NodeOrder.count(I.getSUnit()) != 0)
2114  continue;
2115  if (I.getKind() == SDep::Anti)
2116  continue;
2117  R.insert(I.getSUnit());
2118  }
2119  // Back-edges are predecessors with an anti-dependence.
2120  for (const auto &I : maxDepth->Succs) {
2121  if (I.getKind() != SDep::Anti)
2122  continue;
2123  if (Nodes.count(I.getSUnit()) == 0)
2124  continue;
2125  if (NodeOrder.count(I.getSUnit()) != 0)
2126  continue;
2127  R.insert(I.getSUnit());
2128  }
2129  }
2130  Order = TopDown;
2131  DEBUG(dbgs() << "\n Switching order to top down ");
2133  if (succ_L(NodeOrder, N, &Nodes))
2134  R.insert(N.begin(), N.end());
2135  }
2136  }
2137  DEBUG(dbgs() << "\nDone with Nodeset\n");
2138  }
2139 
2140  DEBUG({
2141  dbgs() << "Node order: ";
2142  for (SUnit *I : NodeOrder)
2143  dbgs() << " " << I->NodeNum << " ";
2144  dbgs() << "\n";
2145  });
2146 }
2147 
2148 /// Process the nodes in the computed order and create the pipelined schedule
2149 /// of the instructions, if possible. Return true if a schedule is found.
2150 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2151  if (NodeOrder.empty())
2152  return false;
2153 
2154  bool scheduleFound = false;
2155  // Keep increasing II until a valid schedule is found.
2156  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2157  Schedule.reset();
2158  Schedule.setInitiationInterval(II);
2159  DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2160 
2161  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2162  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2163  do {
2164  SUnit *SU = *NI;
2165 
2166  // Compute the schedule time for the instruction, which is based
2167  // upon the scheduled time for any predecessors/successors.
2168  int EarlyStart = INT_MIN;
2169  int LateStart = INT_MAX;
2170  // These values are set when the size of the schedule window is limited
2171  // due to chain dependences.
2172  int SchedEnd = INT_MAX;
2173  int SchedStart = INT_MIN;
2174  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2175  II, this);
2176  DEBUG({
2177  dbgs() << "Inst (" << SU->NodeNum << ") ";
2178  SU->getInstr()->dump();
2179  dbgs() << "\n";
2180  });
2181  DEBUG({
2182  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2183  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2184  });
2185 
2186  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2187  SchedStart > LateStart)
2188  scheduleFound = false;
2189  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2190  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2191  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2192  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2193  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2194  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2195  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2196  SchedEnd =
2197  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2198  // When scheduling a Phi it is better to start at the late cycle and go
2199  // backwards. The default order may insert the Phi too far away from
2200  // its first dependence.
2201  if (SU->getInstr()->isPHI())
2202  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2203  else
2204  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2205  } else {
2206  int FirstCycle = Schedule.getFirstCycle();
2207  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2208  FirstCycle + getASAP(SU) + II - 1, II);
2209  }
2210  // Even if we find a schedule, make sure the schedule doesn't exceed the
2211  // allowable number of stages. We keep trying if this happens.
2212  if (scheduleFound)
2213  if (SwpMaxStages > -1 &&
2214  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2215  scheduleFound = false;
2216 
2217  DEBUG({
2218  if (!scheduleFound)
2219  dbgs() << "\tCan't schedule\n";
2220  });
2221  } while (++NI != NE && scheduleFound);
2222 
2223  // If a schedule is found, check if it is a valid schedule too.
2224  if (scheduleFound)
2225  scheduleFound = Schedule.isValidSchedule(this);
2226  }
2227 
2228  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2229 
2230  if (scheduleFound)
2231  Schedule.finalizeSchedule(this);
2232  else
2233  Schedule.reset();
2234 
2235  return scheduleFound && Schedule.getMaxStageCount() > 0;
2236 }
2237 
2238 /// Given a schedule for the loop, generate a new version of the loop,
2239 /// and replace the old version. This function generates a prolog
2240 /// that contains the initial iterations in the pipeline, and kernel
2241 /// loop, and the epilogue that contains the code for the final
2242 /// iterations.
2243 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2244  // Create a new basic block for the kernel and add it to the CFG.
2245  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2246 
2247  unsigned MaxStageCount = Schedule.getMaxStageCount();
2248 
2249  // Remember the registers that are used in different stages. The index is
2250  // the iteration, or stage, that the instruction is scheduled in. This is
2251  // a map between register names in the orignal block and the names created
2252  // in each stage of the pipelined loop.
2253  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2254  InstrMapTy InstrMap;
2255 
2257  // Generate the prolog instructions that set up the pipeline.
2258  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2259  MF.insert(BB->getIterator(), KernelBB);
2260 
2261  // Rearrange the instructions to generate the new, pipelined loop,
2262  // and update register names as needed.
2263  for (int Cycle = Schedule.getFirstCycle(),
2264  LastCycle = Schedule.getFinalCycle();
2265  Cycle <= LastCycle; ++Cycle) {
2266  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2267  // This inner loop schedules each instruction in the cycle.
2268  for (SUnit *CI : CycleInstrs) {
2269  if (CI->getInstr()->isPHI())
2270  continue;
2271  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2272  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2273  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2274  KernelBB->push_back(NewMI);
2275  InstrMap[NewMI] = CI->getInstr();
2276  }
2277  }
2278 
2279  // Copy any terminator instructions to the new kernel, and update
2280  // names as needed.
2281  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2282  E = BB->instr_end();
2283  I != E; ++I) {
2284  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2285  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2286  KernelBB->push_back(NewMI);
2287  InstrMap[NewMI] = &*I;
2288  }
2289 
2290  KernelBB->transferSuccessors(BB);
2291  KernelBB->replaceSuccessor(BB, KernelBB);
2292 
2293  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2294  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2295  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2296  InstrMap, MaxStageCount, MaxStageCount, false);
2297 
2298  DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2299 
2301  // Generate the epilog instructions to complete the pipeline.
2302  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2303  PrologBBs);
2304 
2305  // We need this step because the register allocation doesn't handle some
2306  // situations well, so we insert copies to help out.
2307  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2308 
2309  // Remove dead instructions due to loop induction variables.
2310  removeDeadInstructions(KernelBB, EpilogBBs);
2311 
2312  // Add branches between prolog and epilog blocks.
2313  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2314 
2315  // Remove the original loop since it's no longer referenced.
2316  BB->clear();
2317  BB->eraseFromParent();
2318 
2319  delete[] VRMap;
2320 }
2321 
2322 /// Generate the pipeline prolog code.
2323 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2324  MachineBasicBlock *KernelBB,
2325  ValueMapTy *VRMap,
2326  MBBVectorTy &PrologBBs) {
2327  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2328  assert(PreheaderBB != nullptr &&
2329  "Need to add code to handle loops w/o preheader");
2330  MachineBasicBlock *PredBB = PreheaderBB;
2331  InstrMapTy InstrMap;
2332 
2333  // Generate a basic block for each stage, not including the last stage,
2334  // which will be generated in the kernel. Each basic block may contain
2335  // instructions from multiple stages/iterations.
2336  for (unsigned i = 0; i < LastStage; ++i) {
2337  // Create and insert the prolog basic block prior to the original loop
2338  // basic block. The original loop is removed later.
2339  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2340  PrologBBs.push_back(NewBB);
2341  MF.insert(BB->getIterator(), NewBB);
2342  NewBB->transferSuccessors(PredBB);
2343  PredBB->addSuccessor(NewBB);
2344  PredBB = NewBB;
2345 
2346  // Generate instructions for each appropriate stage. Process instructions
2347  // in original program order.
2348  for (int StageNum = i; StageNum >= 0; --StageNum) {
2349  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2350  BBE = BB->getFirstTerminator();
2351  BBI != BBE; ++BBI) {
2352  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2353  if (BBI->isPHI())
2354  continue;
2355  MachineInstr *NewMI =
2356  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2357  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2358  VRMap);
2359  NewBB->push_back(NewMI);
2360  InstrMap[NewMI] = &*BBI;
2361  }
2362  }
2363  }
2364  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2365  DEBUG({
2366  dbgs() << "prolog:\n";
2367  NewBB->dump();
2368  });
2369  }
2370 
2371  PredBB->replaceSuccessor(BB, KernelBB);
2372 
2373  // Check if we need to remove the branch from the preheader to the original
2374  // loop, and replace it with a branch to the new loop.
2375  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2376  if (numBranches) {
2378  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2379  }
2380 }
2381 
2382 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2383 /// that were started in either the prolog or the kernel. We create a basic
2384 /// block for each stage that needs to complete.
2385 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2386  MachineBasicBlock *KernelBB,
2387  ValueMapTy *VRMap,
2388  MBBVectorTy &EpilogBBs,
2389  MBBVectorTy &PrologBBs) {
2390  // We need to change the branch from the kernel to the first epilog block, so
2391  // this call to analyze branch uses the kernel rather than the original BB.
2392  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2394  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2395  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2396  if (checkBranch)
2397  return;
2398 
2399  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2400  if (*LoopExitI == KernelBB)
2401  ++LoopExitI;
2402  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2403  MachineBasicBlock *LoopExitBB = *LoopExitI;
2404 
2405  MachineBasicBlock *PredBB = KernelBB;
2406  MachineBasicBlock *EpilogStart = LoopExitBB;
2407  InstrMapTy InstrMap;
2408 
2409  // Generate a basic block for each stage, not including the last stage,
2410  // which was generated for the kernel. Each basic block may contain
2411  // instructions from multiple stages/iterations.
2412  int EpilogStage = LastStage + 1;
2413  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2415  EpilogBBs.push_back(NewBB);
2416  MF.insert(BB->getIterator(), NewBB);
2417 
2418  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2419  NewBB->addSuccessor(LoopExitBB);
2420 
2421  if (EpilogStart == LoopExitBB)
2422  EpilogStart = NewBB;
2423 
2424  // Add instructions to the epilog depending on the current block.
2425  // Process instructions in original program order.
2426  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2427  for (auto &BBI : *BB) {
2428  if (BBI.isPHI())
2429  continue;
2430  MachineInstr *In = &BBI;
2431  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2432  MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
2433  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2434  NewBB->push_back(NewMI);
2435  InstrMap[NewMI] = In;
2436  }
2437  }
2438  }
2439  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2440  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2441  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2442  InstrMap, LastStage, EpilogStage, i == 1);
2443  PredBB = NewBB;
2444 
2445  DEBUG({
2446  dbgs() << "epilog:\n";
2447  NewBB->dump();
2448  });
2449  }
2450 
2451  // Fix any Phi nodes in the loop exit block.
2452  for (MachineInstr &MI : *LoopExitBB) {
2453  if (!MI.isPHI())
2454  break;
2455  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2456  MachineOperand &MO = MI.getOperand(i);
2457  if (MO.getMBB() == BB)
2458  MO.setMBB(PredBB);
2459  }
2460  }
2461 
2462  // Create a branch to the new epilog from the kernel.
2463  // Remove the original branch and add a new branch to the epilog.
2464  TII->removeBranch(*KernelBB);
2465  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2466  // Add a branch to the loop exit.
2467  if (EpilogBBs.size() > 0) {
2468  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2470  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2471  }
2472 }
2473 
2474 /// Replace all uses of FromReg that appear outside the specified
2475 /// basic block with ToReg.
2476 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2477  MachineBasicBlock *MBB,
2478  MachineRegisterInfo &MRI,
2479  LiveIntervals &LIS) {
2480  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2481  E = MRI.use_end();
2482  I != E;) {
2483  MachineOperand &O = *I;
2484  ++I;
2485  if (O.getParent()->getParent() != MBB)
2486  O.setReg(ToReg);
2487  }
2488  if (!LIS.hasInterval(ToReg))
2489  LIS.createEmptyInterval(ToReg);
2490 }
2491 
2492 /// Return true if the register has a use that occurs outside the
2493 /// specified loop.
2494 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2495  MachineRegisterInfo &MRI) {
2497  E = MRI.use_end();
2498  I != E; ++I)
2499  if (I->getParent()->getParent() != BB)
2500  return true;
2501  return false;
2502 }
2503 
2504 /// Generate Phis for the specific block in the generated pipelined code.
2505 /// This function looks at the Phis from the original code to guide the
2506 /// creation of new Phis.
2507 void SwingSchedulerDAG::generateExistingPhis(
2509  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2510  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2511  bool IsLast) {
2512  // Compute the stage number for the initial value of the Phi, which
2513  // comes from the prolog. The prolog to use depends on to which kernel/
2514  // epilog that we're adding the Phi.
2515  unsigned PrologStage = 0;
2516  unsigned PrevStage = 0;
2517  bool InKernel = (LastStageNum == CurStageNum);
2518  if (InKernel) {
2519  PrologStage = LastStageNum - 1;
2520  PrevStage = CurStageNum;
2521  } else {
2522  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2523  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2524  }
2525 
2526  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2527  BBE = BB->getFirstNonPHI();
2528  BBI != BBE; ++BBI) {
2529  unsigned Def = BBI->getOperand(0).getReg();
2530 
2531  unsigned InitVal = 0;
2532  unsigned LoopVal = 0;
2533  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2534 
2535  unsigned PhiOp1 = 0;
2536  // The Phi value from the loop body typically is defined in the loop, but
2537  // not always. So, we need to check if the value is defined in the loop.
2538  unsigned PhiOp2 = LoopVal;
2539  if (VRMap[LastStageNum].count(LoopVal))
2540  PhiOp2 = VRMap[LastStageNum][LoopVal];
2541 
2542  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2543  int LoopValStage =
2544  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2545  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2546  if (NumStages == 0) {
2547  // We don't need to generate a Phi anymore, but we need to rename any uses
2548  // of the Phi value.
2549  unsigned NewReg = VRMap[PrevStage][LoopVal];
2550  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2551  Def, NewReg);
2552  if (VRMap[CurStageNum].count(LoopVal))
2553  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2554  }
2555  // Adjust the number of Phis needed depending on the number of prologs left,
2556  // and the distance from where the Phi is first scheduled.
2557  unsigned NumPhis = NumStages;
2558  if (!InKernel && (int)PrologStage < LoopValStage)
2559  // The NumPhis is the maximum number of new Phis needed during the steady
2560  // state. If the Phi has not been scheduled in current prolog, then we
2561  // need to generate less Phis.
2562  NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
2563  // The number of Phis cannot exceed the number of prolog stages. Each
2564  // stage can potentially define two values.
2565  NumPhis = std::min(NumPhis, PrologStage + 2);
2566 
2567  unsigned NewReg = 0;
2568 
2569  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2570  // In the epilog, we may need to look back one stage to get the correct
2571  // Phi name because the epilog and prolog blocks execute the same stage.
2572  // The correct name is from the previous block only when the Phi has
2573  // been completely scheduled prior to the epilog, and Phi value is not
2574  // needed in multiple stages.
2575  int StageDiff = 0;
2576  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2577  NumPhis == 1)
2578  StageDiff = 1;
2579  // Adjust the computations below when the phi and the loop definition
2580  // are scheduled in different stages.
2581  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2582  StageDiff = StageScheduled - LoopValStage;
2583  for (unsigned np = 0; np < NumPhis; ++np) {
2584  // If the Phi hasn't been scheduled, then use the initial Phi operand
2585  // value. Otherwise, use the scheduled version of the instruction. This
2586  // is a little complicated when a Phi references another Phi.
2587  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2588  PhiOp1 = InitVal;
2589  // Check if the Phi has already been scheduled in a prolog stage.
2590  else if (PrologStage >= AccessStage + StageDiff + np &&
2591  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2592  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2593  // Check if the Phi has already been scheduled, but the loop intruction
2594  // is either another Phi, or doesn't occur in the loop.
2595  else if (PrologStage >= AccessStage + StageDiff + np) {
2596  // If the Phi references another Phi, we need to examine the other
2597  // Phi to get the correct value.
2598  PhiOp1 = LoopVal;
2599  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2600  int Indirects = 1;
2601  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2602  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2603  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2604  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2605  else
2606  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2607  InstOp1 = MRI.getVRegDef(PhiOp1);
2608  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2609  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2610  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2611  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2612  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2613  break;
2614  }
2615  ++Indirects;
2616  }
2617  } else
2618  PhiOp1 = InitVal;
2619  // If this references a generated Phi in the kernel, get the Phi operand
2620  // from the incoming block.
2621  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2622  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2623  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2624 
2625  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2626  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2627  // In the epilog, a map lookup is needed to get the value from the kernel,
2628  // or previous epilog block. How is does this depends on if the
2629  // instruction is scheduled in the previous block.
2630  if (!InKernel) {
2631  int StageDiffAdj = 0;
2632  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2633  StageDiffAdj = StageScheduled - LoopValStage;
2634  // Use the loop value defined in the kernel, unless the kernel
2635  // contains the last definition of the Phi.
2636  if (np == 0 && PrevStage == LastStageNum &&
2637  (StageScheduled != 0 || LoopValStage != 0) &&
2638  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2639  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2640  // Use the value defined by the Phi. We add one because we switch
2641  // from looking at the loop value to the Phi definition.
2642  else if (np > 0 && PrevStage == LastStageNum &&
2643  VRMap[PrevStage - np + 1].count(Def))
2644  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2645  // Use the loop value defined in the kernel.
2646  else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
2647  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2648  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2649  // Use the value defined by the Phi, unless we're generating the first
2650  // epilog and the Phi refers to a Phi in a different stage.
2651  else if (VRMap[PrevStage - np].count(Def) &&
2652  (!LoopDefIsPhi || PrevStage != LastStageNum))
2653  PhiOp2 = VRMap[PrevStage - np][Def];
2654  }
2655 
2656  // Check if we can reuse an existing Phi. This occurs when a Phi
2657  // references another Phi, and the other Phi is scheduled in an
2658  // earlier stage. We can try to reuse an existing Phi up until the last
2659  // stage of the current Phi.
2660  if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
2661  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2662  int StageDiff = (StageScheduled - LoopValStage);
2663  LVNumStages -= StageDiff;
2664  if (LVNumStages > (int)np) {
2665  NewReg = PhiOp2;
2666  unsigned ReuseStage = CurStageNum;
2667  if (Schedule.isLoopCarried(this, *PhiInst))
2668  ReuseStage -= LVNumStages;
2669  // Check if the Phi to reuse has been generated yet. If not, then
2670  // there is nothing to reuse.
2671  if (VRMap[ReuseStage].count(LoopVal)) {
2672  NewReg = VRMap[ReuseStage][LoopVal];
2673 
2674  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2675  &*BBI, Def, NewReg);
2676  // Update the map with the new Phi name.
2677  VRMap[CurStageNum - np][Def] = NewReg;
2678  PhiOp2 = NewReg;
2679  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2680  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2681 
2682  if (IsLast && np == NumPhis - 1)
2683  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2684  continue;
2685  }
2686  } else if (InKernel && StageDiff > 0 &&
2687  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2688  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2689  }
2690 
2691  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2692  NewReg = MRI.createVirtualRegister(RC);
2693 
2694  MachineInstrBuilder NewPhi =
2695  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2696  TII->get(TargetOpcode::PHI), NewReg);
2697  NewPhi.addReg(PhiOp1).addMBB(BB1);
2698  NewPhi.addReg(PhiOp2).addMBB(BB2);
2699  if (np == 0)
2700  InstrMap[NewPhi] = &*BBI;
2701 
2702  // We define the Phis after creating the new pipelined code, so
2703  // we need to rename the Phi values in scheduled instructions.
2704 
2705  unsigned PrevReg = 0;
2706  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2707  PrevReg = VRMap[PrevStage - np][LoopVal];
2708  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2709  Def, NewReg, PrevReg);
2710  // If the Phi has been scheduled, use the new name for rewriting.
2711  if (VRMap[CurStageNum - np].count(Def)) {
2712  unsigned R = VRMap[CurStageNum - np][Def];
2713  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2714  R, NewReg);
2715  }
2716 
2717  // Check if we need to rename any uses that occurs after the loop. The
2718  // register to replace depends on whether the Phi is scheduled in the
2719  // epilog.
2720  if (IsLast && np == NumPhis - 1)
2721  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2722 
2723  // In the kernel, a dependent Phi uses the value from this Phi.
2724  if (InKernel)
2725  PhiOp2 = NewReg;
2726 
2727  // Update the map with the new Phi name.
2728  VRMap[CurStageNum - np][Def] = NewReg;
2729  }
2730 
2731  while (NumPhis++ < NumStages) {
2732  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2733  &*BBI, Def, NewReg, 0);
2734  }
2735 
2736  // Check if we need to rename a Phi that has been eliminated due to
2737  // scheduling.
2738  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2739  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2740  }
2741 }
2742 
2743 /// Generate Phis for the specified block in the generated pipelined code.
2744 /// These are new Phis needed because the definition is scheduled after the
2745 /// use in the pipelened sequence.
2746 void SwingSchedulerDAG::generatePhis(
2748  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2749  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2750  bool IsLast) {
2751  // Compute the stage number that contains the initial Phi value, and
2752  // the Phi from the previous stage.
2753  unsigned PrologStage = 0;
2754  unsigned PrevStage = 0;
2755  unsigned StageDiff = CurStageNum - LastStageNum;
2756  bool InKernel = (StageDiff == 0);
2757  if (InKernel) {
2758  PrologStage = LastStageNum - 1;
2759  PrevStage = CurStageNum;
2760  } else {
2761  PrologStage = LastStageNum - StageDiff;
2762  PrevStage = LastStageNum + StageDiff - 1;
2763  }
2764 
2765  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2766  BBE = BB->instr_end();
2767  BBI != BBE; ++BBI) {
2768  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2769  MachineOperand &MO = BBI->getOperand(i);
2770  if (!MO.isReg() || !MO.isDef() ||
2772  continue;
2773 
2774  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2775  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2776  unsigned Def = MO.getReg();
2777  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2778  // An instruction scheduled in stage 0 and is used after the loop
2779  // requires a phi in the epilog for the last definition from either
2780  // the kernel or prolog.
2781  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2782  hasUseAfterLoop(Def, BB, MRI))
2783  NumPhis = 1;
2784  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2785  continue;
2786 
2787  unsigned PhiOp2 = VRMap[PrevStage][Def];
2788  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2789  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2790  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2791  // The number of Phis can't exceed the number of prolog stages. The
2792  // prolog stage number is zero based.
2793  if (NumPhis > PrologStage + 1 - StageScheduled)
2794  NumPhis = PrologStage + 1 - StageScheduled;
2795  for (unsigned np = 0; np < NumPhis; ++np) {
2796  unsigned PhiOp1 = VRMap[PrologStage][Def];
2797  if (np <= PrologStage)
2798  PhiOp1 = VRMap[PrologStage - np][Def];
2799  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2800  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2801  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2802  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2803  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2804  }
2805  if (!InKernel)
2806  PhiOp2 = VRMap[PrevStage - np][Def];
2807 
2808  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2809  unsigned NewReg = MRI.createVirtualRegister(RC);
2810 
2811  MachineInstrBuilder NewPhi =
2812  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2813  TII->get(TargetOpcode::PHI), NewReg);
2814  NewPhi.addReg(PhiOp1).addMBB(BB1);
2815  NewPhi.addReg(PhiOp2).addMBB(BB2);
2816  if (np == 0)
2817  InstrMap[NewPhi] = &*BBI;
2818 
2819  // Rewrite uses and update the map. The actions depend upon whether
2820  // we generating code for the kernel or epilog blocks.
2821  if (InKernel) {
2822  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2823  &*BBI, PhiOp1, NewReg);
2824  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2825  &*BBI, PhiOp2, NewReg);
2826 
2827  PhiOp2 = NewReg;
2828  VRMap[PrevStage - np - 1][Def] = NewReg;
2829  } else {
2830  VRMap[CurStageNum - np][Def] = NewReg;
2831  if (np == NumPhis - 1)
2832  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2833  &*BBI, Def, NewReg);
2834  }
2835  if (IsLast && np == NumPhis - 1)
2836  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2837  }
2838  }
2839  }
2840 }
2841 
2842 /// Remove instructions that generate values with no uses.
2843 /// Typically, these are induction variable operations that generate values
2844 /// used in the loop itself. A dead instruction has a definition with
2845 /// no uses, or uses that occur in the original loop only.
2846 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2847  MBBVectorTy &EpilogBBs) {
2848  // For each epilog block, check that the value defined by each instruction
2849  // is used. If not, delete it.
2850  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2851  MBE = EpilogBBs.rend();
2852  MBB != MBE; ++MBB)
2853  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2854  ME = (*MBB)->instr_rend();
2855  MI != ME;) {
2856  // From DeadMachineInstructionElem. Don't delete inline assembly.
2857  if (MI->isInlineAsm()) {
2858  ++MI;
2859  continue;
2860  }
2861  bool SawStore = false;
2862  // Check if it's safe to remove the instruction due to side effects.
2863  // We can, and want to, remove Phis here.
2864  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2865  ++MI;
2866  continue;
2867  }
2868  bool used = true;
2869  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2870  MOE = MI->operands_end();
2871  MOI != MOE; ++MOI) {
2872  if (!MOI->isReg() || !MOI->isDef())
2873  continue;
2874  unsigned reg = MOI->getReg();
2875  unsigned realUses = 0;
2876  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2877  EI = MRI.use_end();
2878  UI != EI; ++UI) {
2879  // Check if there are any uses that occur only in the original
2880  // loop. If so, that's not a real use.
2881  if (UI->getParent()->getParent() != BB) {
2882  realUses++;
2883  used = true;
2884  break;
2885  }
2886  }
2887  if (realUses > 0)
2888  break;
2889  used = false;
2890  }
2891  if (!used) {
2892  MI++->eraseFromParent();
2893  continue;
2894  }
2895  ++MI;
2896  }
2897  // In the kernel block, check if we can remove a Phi that generates a value
2898  // used in an instruction removed in the epilog block.
2899  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2900  BBE = KernelBB->getFirstNonPHI();
2901  BBI != BBE;) {
2902  MachineInstr *MI = &*BBI;
2903  ++BBI;
2904  unsigned reg = MI->getOperand(0).getReg();
2905  if (MRI.use_begin(reg) == MRI.use_end()) {
2906  MI->eraseFromParent();
2907  }
2908  }
2909 }
2910 
2911 /// For loop carried definitions, we split the lifetime of a virtual register
2912 /// that has uses past the definition in the next iteration. A copy with a new
2913 /// virtual register is inserted before the definition, which helps with
2914 /// generating a better register assignment.
2915 ///
2916 /// v1 = phi(a, v2) v1 = phi(a, v2)
2917 /// v2 = phi(b, v3) v2 = phi(b, v3)
2918 /// v3 = .. v4 = copy v1
2919 /// .. = V1 v3 = ..
2920 /// .. = v4
2921 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2922  MBBVectorTy &EpilogBBs,
2923  SMSchedule &Schedule) {
2924  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2925  for (auto &PHI : KernelBB->phis()) {
2926  unsigned Def = PHI.getOperand(0).getReg();
2927  // Check for any Phi definition that used as an operand of another Phi
2928  // in the same block.
2930  E = MRI.use_instr_end();
2931  I != E; ++I) {
2932  if (I->isPHI() && I->getParent() == KernelBB) {
2933  // Get the loop carried definition.
2934  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2935  if (!LCDef)
2936  continue;
2937  MachineInstr *MI = MRI.getVRegDef(LCDef);
2938  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2939  continue;
2940  // Search through the rest of the block looking for uses of the Phi
2941  // definition. If one occurs, then split the lifetime.
2942  unsigned SplitReg = 0;
2943  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2944  KernelBB->instr_end()))
2945  if (BBJ.readsRegister(Def)) {
2946  // We split the lifetime when we find the first use.
2947  if (SplitReg == 0) {
2948  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2949  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2950  TII->get(TargetOpcode::COPY), SplitReg)
2951  .addReg(Def);
2952  }
2953  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2954  }
2955  if (!SplitReg)
2956  continue;
2957  // Search through each of the epilog blocks for any uses to be renamed.
2958  for (auto &Epilog : EpilogBBs)
2959  for (auto &I : *Epilog)
2960  if (I.readsRegister(Def))
2961  I.substituteRegister(Def, SplitReg, 0, *TRI);
2962  break;
2963  }
2964  }
2965  }
2966 }
2967 
2968 /// Remove the incoming block from the Phis in a basic block.
2969 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2970  for (MachineInstr &MI : *BB) {
2971  if (!MI.isPHI())
2972  break;
2973  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2974  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2975  MI.RemoveOperand(i + 1);
2976  MI.RemoveOperand(i);
2977  break;
2978  }
2979  }
2980 }
2981 
2982 /// Create branches from each prolog basic block to the appropriate epilog
2983 /// block. These edges are needed if the loop ends before reaching the
2984 /// kernel.
2985 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2986  MachineBasicBlock *KernelBB,
2987  MBBVectorTy &EpilogBBs,
2988  SMSchedule &Schedule, ValueMapTy *VRMap) {
2989  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2990  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2991  MachineInstr *Cmp = Pass.LI.LoopCompare;
2992  MachineBasicBlock *LastPro = KernelBB;
2993  MachineBasicBlock *LastEpi = KernelBB;
2994 
2995  // Start from the blocks connected to the kernel and work "out"
2996  // to the first prolog and the last epilog blocks.
2998  unsigned MaxIter = PrologBBs.size() - 1;
2999  unsigned LC = UINT_MAX;
3000  unsigned LCMin = UINT_MAX;
3001  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3002  // Add branches to the prolog that go to the corresponding
3003  // epilog, and the fall-thru prolog/kernel block.
3004  MachineBasicBlock *Prolog = PrologBBs[j];
3005  MachineBasicBlock *Epilog = EpilogBBs[i];
3006  // We've executed one iteration, so decrement the loop count and check for
3007  // the loop end.
3009  // Check if the LOOP0 has already been removed. If so, then there is no need
3010  // to reduce the trip count.
3011  if (LC != 0)
3012  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3013  MaxIter);
3014 
3015  // Record the value of the first trip count, which is used to determine if
3016  // branches and blocks can be removed for constant trip counts.
3017  if (LCMin == UINT_MAX)
3018  LCMin = LC;
3019 
3020  unsigned numAdded = 0;
3022  Prolog->addSuccessor(Epilog);
3023  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3024  } else if (j >= LCMin) {
3025  Prolog->addSuccessor(Epilog);
3026  Prolog->removeSuccessor(LastPro);
3027  LastEpi->removeSuccessor(Epilog);
3028  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3029  removePhis(Epilog, LastEpi);
3030  // Remove the blocks that are no longer referenced.
3031  if (LastPro != LastEpi) {
3032  LastEpi->clear();
3033  LastEpi->eraseFromParent();
3034  }
3035  LastPro->clear();
3036  LastPro->eraseFromParent();
3037  } else {
3038  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3039  removePhis(Epilog, Prolog);
3040  }
3041  LastPro = Prolog;
3042  LastEpi = Epilog;
3044  E = Prolog->instr_rend();
3045  I != E && numAdded > 0; ++I, --numAdded)
3046  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3047  }
3048 }
3049 
3050 /// Return true if we can compute the amount the instruction changes
3051 /// during each iteration. Set Delta to the amount of the change.
3052 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3053  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3054  unsigned BaseReg;
3055  int64_t Offset;
3056  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
3057  return false;
3058 
3059  MachineRegisterInfo &MRI = MF.getRegInfo();
3060  // Check if there is a Phi. If so, get the definition in the loop.
3061  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3062  if (BaseDef && BaseDef->isPHI()) {
3063  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3064  BaseDef = MRI.getVRegDef(BaseReg);
3065  }
3066  if (!BaseDef)
3067  return false;
3068 
3069  int D = 0;
3070  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3071  return false;
3072 
3073  Delta = D;
3074  return true;
3075 }
3076 
3077 /// Update the memory operand with a new offset when the pipeliner
3078 /// generates a new copy of the instruction that refers to a
3079 /// different memory location.
3080 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3081  MachineInstr &OldMI, unsigned Num) {
3082  if (Num == 0)
3083  return;
3084  // If the instruction has memory operands, then adjust the offset
3085  // when the instruction appears in different stages.
3086  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
3087  if (NumRefs == 0)
3088  return;
3089  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
3090  unsigned Refs = 0;
3091  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3092  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3093  (!MMO->getValue())) {
3094  NewMemRefs[Refs++] = MMO;
3095  continue;
3096  }
3097  unsigned Delta;
3098  if (computeDelta(OldMI, Delta)) {
3099  int64_t AdjOffset = Delta * Num;
3100  NewMemRefs[Refs++] =
3101  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
3102  } else
3103  NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
3104  }
3105  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
3106 }
3107 
3108 /// Clone the instruction for the new pipelined loop and update the
3109 /// memory operands, if needed.
3110 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3111  unsigned CurStageNum,
3112  unsigned InstStageNum) {
3113  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3114  // Check for tied operands in inline asm instructions. This should be handled
3115  // elsewhere, but I'm not sure of the best solution.
3116  if (OldMI->isInlineAsm())
3117  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3118  const auto &MO = OldMI->getOperand(i);
3119  if (MO.isReg() && MO.isUse())
3120  break;
3121  unsigned UseIdx;
3122  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3123  NewMI->tieOperands(i, UseIdx);
3124  }
3125  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3126  return NewMI;
3127 }
3128 
3129 /// Clone the instruction for the new pipelined loop. If needed, this
3130 /// function updates the instruction using the values saved in the
3131 /// InstrChanges structure.
3132 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3133  unsigned CurStageNum,
3134  unsigned InstStageNum,
3135  SMSchedule &Schedule) {
3136  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3138  InstrChanges.find(getSUnit(OldMI));
3139  if (It != InstrChanges.end()) {
3140  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3141  unsigned BasePos, OffsetPos;
3142  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3143  return nullptr;
3144  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3145  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3146  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3147  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3148  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3149  }
3150  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3151  return NewMI;
3152 }
3153 
3154 /// Update the machine instruction with new virtual registers. This
3155 /// function may change the defintions and/or uses.
3156 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3157  unsigned CurStageNum,
3158  unsigned InstrStageNum,
3159  SMSchedule &Schedule,
3160  ValueMapTy *VRMap) {
3161  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3162  MachineOperand &MO = NewMI->getOperand(i);
3164  continue;
3165  unsigned reg = MO.getReg();
3166  if (MO.isDef()) {
3167  // Create a new virtual register for the definition.
3168  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3169  unsigned NewReg = MRI.createVirtualRegister(RC);
3170  MO.setReg(NewReg);
3171  VRMap[CurStageNum][reg] = NewReg;
3172  if (LastDef)
3173  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3174  } else if (MO.isUse()) {
3175  MachineInstr *Def = MRI.getVRegDef(reg);
3176  // Compute the stage that contains the last definition for instruction.
3177  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3178  unsigned StageNum = CurStageNum;
3179  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3180  // Compute the difference in stages between the defintion and the use.
3181  unsigned StageDiff = (InstrStageNum - DefStageNum);
3182  // Make an adjustment to get the last definition.
3183  StageNum -= StageDiff;
3184  }
3185  if (VRMap[StageNum].count(reg))
3186  MO.setReg(VRMap[StageNum][reg]);
3187  }
3188  }
3189 }
3190 
3191 /// Return the instruction in the loop that defines the register.
3192 /// If the definition is a Phi, then follow the Phi operand to
3193 /// the instruction in the loop.
3194 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3196  MachineInstr *Def = MRI.getVRegDef(Reg);
3197  while (Def->isPHI()) {
3198  if (!Visited.insert(Def).second)
3199  break;
3200  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3201  if (Def->getOperand(i + 1).getMBB() == BB) {
3202  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3203  break;
3204  }
3205  }
3206  return Def;
3207 }
3208 
3209 /// Return the new name for the value from the previous stage.
3210 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3211  unsigned LoopVal, unsigned LoopStage,
3212  ValueMapTy *VRMap,
3213  MachineBasicBlock *BB) {
3214  unsigned PrevVal = 0;
3215  if (StageNum > PhiStage) {
3216  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3217  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3218  // The name is defined in the previous stage.
3219  PrevVal = VRMap[StageNum - 1][LoopVal];
3220  else if (VRMap[StageNum].count(LoopVal))
3221  // The previous name is defined in the current stage when the instruction
3222  // order is swapped.
3223  PrevVal = VRMap[StageNum][LoopVal];
3224  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3225  // The loop value hasn't yet been scheduled.
3226  PrevVal = LoopVal;
3227  else if (StageNum == PhiStage + 1)
3228  // The loop value is another phi, which has not been scheduled.
3229  PrevVal = getInitPhiReg(*LoopInst, BB);
3230  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3231  // The loop value is another phi, which has been scheduled.
3232  PrevVal =
3233  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3234  LoopStage, VRMap, BB);
3235  }
3236  return PrevVal;
3237 }
3238 
3239 /// Rewrite the Phi values in the specified block to use the mappings
3240 /// from the initial operand. Once the Phi is scheduled, we switch
3241 /// to using the loop value instead of the Phi value, so those names
3242 /// do not need to be rewritten.
3243 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3244  unsigned StageNum,
3245  SMSchedule &Schedule,
3246  ValueMapTy *VRMap,
3247  InstrMapTy &InstrMap) {
3248  for (auto &PHI : BB->phis()) {
3249  unsigned InitVal = 0;
3250  unsigned LoopVal = 0;
3251  getPhiRegs(PHI, BB, InitVal, LoopVal);
3252  unsigned PhiDef = PHI.getOperand(0).getReg();
3253 
3254  unsigned PhiStage =
3255  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3256  unsigned LoopStage =
3257  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3258  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3259  if (NumPhis > StageNum)
3260  NumPhis = StageNum;
3261  for (unsigned np = 0; np <= NumPhis; ++np) {
3262  unsigned NewVal =
3263  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3264  if (!NewVal)
3265  NewVal = InitVal;
3266  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3267  PhiDef, NewVal);
3268  }
3269  }
3270 }
3271 
3272 /// Rewrite a previously scheduled instruction to use the register value
3273 /// from the new instruction. Make sure the instruction occurs in the
3274 /// basic block, and we don't change the uses in the new instruction.
3275 void SwingSchedulerDAG::rewriteScheduledInstr(
3276  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3277  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3278  unsigned NewReg, unsigned PrevReg) {
3279  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3280  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3281  // Rewrite uses that have been scheduled already to use the new
3282  // Phi register.
3283  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3284  EI = MRI.use_end();
3285  UI != EI;) {
3286  MachineOperand &UseOp = *UI;
3287  MachineInstr *UseMI = UseOp.getParent();
3288  ++UI;
3289  if (UseMI->getParent() != BB)
3290  continue;
3291  if (UseMI->isPHI()) {
3292  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3293  continue;
3294  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3295  continue;
3296  }
3297  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3298  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3299  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3300  int StageSched = Schedule.stageScheduled(OrigMISU);
3301  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3302  unsigned ReplaceReg = 0;
3303  // This is the stage for the scheduled instruction.
3304  if (StagePhi == StageSched && Phi->isPHI()) {
3305  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3306  if (PrevReg && InProlog)
3307  ReplaceReg = PrevReg;
3308  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3309  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3310  ReplaceReg = PrevReg;
3311  else
3312  ReplaceReg = NewReg;
3313  }
3314  // The scheduled instruction occurs before the scheduled Phi, and the
3315  // Phi is not loop carried.
3316  if (!InProlog && StagePhi + 1 == StageSched &&
3317  !Schedule.isLoopCarried(this, *Phi))
3318  ReplaceReg = NewReg;
3319  if (StagePhi > StageSched && Phi->isPHI())
3320  ReplaceReg = NewReg;
3321  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3322  ReplaceReg = NewReg;
3323  if (ReplaceReg) {
3324  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3325  UseOp.setReg(ReplaceReg);
3326  }
3327  }
3328 }
3329 
3330 /// Check if we can change the instruction to use an offset value from the
3331 /// previous iteration. If so, return true and set the base and offset values
3332 /// so that we can rewrite the load, if necessary.
3333 /// v1 = Phi(v0, v3)
3334 /// v2 = load v1, 0
3335 /// v3 = post_store v1, 4, x
3336 /// This function enables the load to be rewritten as v2 = load v3, 4.
3337 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3338  unsigned &BasePos,
3339  unsigned &OffsetPos,
3340  unsigned &NewBase,
3341  int64_t &Offset) {
3342  // Get the load instruction.
3343  if (TII->isPostIncrement(*MI))
3344  return false;
3345  unsigned BasePosLd, OffsetPosLd;
3346  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3347  return false;
3348  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3349 
3350  // Look for the Phi instruction.
3351  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3352  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3353  if (!Phi || !Phi->isPHI())
3354  return false;
3355  // Get the register defined in the loop block.
3356  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3357  if (!PrevReg)
3358  return false;
3359 
3360  // Check for the post-increment load/store instruction.
3361  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3362  if (!PrevDef || PrevDef == MI)
3363  return false;
3364 
3365  if (!TII->isPostIncrement(*PrevDef))
3366  return false;
3367 
3368  unsigned BasePos1 = 0, OffsetPos1 = 0;
3369  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3370  return false;
3371 
3372  // Make sure offset values are both positive or both negative.
3373  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3374  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3375  if ((LoadOffset >= 0) != (StoreOffset >= 0))
3376  return false;
3377 
3378  // Set the return value once we determine that we return true.
3379  BasePos = BasePosLd;
3380  OffsetPos = OffsetPosLd;
3381  NewBase = PrevReg;
3382  Offset = StoreOffset;
3383  return true;
3384 }
3385 
3386 /// Apply changes to the instruction if needed. The changes are need
3387 /// to improve the scheduling and depend up on the final schedule.
3388 void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3389  SMSchedule &Schedule) {
3390  SUnit *SU = getSUnit(MI);
3392  InstrChanges.find(SU);
3393  if (It != InstrChanges.end()) {
3394  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3395  unsigned BasePos, OffsetPos;
3396  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3397  return;
3398  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3399  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3400  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3401  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3402  int BaseStageNum = Schedule.stageScheduled(SU);
3403  int BaseCycleNum = Schedule.cycleScheduled(SU);
3404  if (BaseStageNum < DefStageNum) {
3405  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3406  int OffsetDiff = DefStageNum - BaseStageNum;
3407  if (DefCycleNum < BaseCycleNum) {
3408  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3409  if (OffsetDiff > 0)
3410  --OffsetDiff;
3411  }
3412  int64_t NewOffset =
3413  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3414  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3415  SU->setInstr(NewMI);
3416  MISUnitMap[NewMI] = SU;
3417  NewMIs.insert(NewMI);
3418  }
3419  }
3420 }
3421 
3422 /// Return true for an order dependence that is loop carried potentially.
3423 /// An order dependence is loop carried if the destination defines a value
3424 /// that may be used by the source in a subsequent iteration.
3425 bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
3426  bool isSucc) {
3427  if (!isOrder(Source, Dep) || Dep.isArtificial())
3428  return false;
3429 
3430  if (!SwpPruneLoopCarried)
3431  return true;
3432 
3433  MachineInstr *SI = Source->getInstr();
3434  MachineInstr *DI = Dep.getSUnit()->getInstr();
3435  if (!isSucc)
3436  std::swap(SI, DI);
3437  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3438 
3439  // Assume ordered loads and stores may have a loop carried dependence.
3440  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3442  return true;
3443 
3444  // Only chain dependences between a load and store can be loop carried.
3445  if (!DI->mayStore() || !SI->mayLoad())
3446  return false;
3447 
3448  unsigned DeltaS, DeltaD;
3449  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3450  return true;
3451 
3452  unsigned BaseRegS, BaseRegD;
3453  int64_t OffsetS, OffsetD;
3454  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3455  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
3456  !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
3457  return true;
3458 
3459  if (BaseRegS != BaseRegD)
3460  return true;
3461 
3462  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3463  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3464 
3465  // This is the main test, which checks the offset values and the loop
3466  // increment value to determine if the accesses may be loop carried.
3467  if (OffsetS >= OffsetD)
3468  return OffsetS + AccessSizeS > DeltaS;
3469  else
3470  return OffsetD + AccessSizeD > DeltaD;
3471 
3472  return true;
3473 }
3474 
3475 void SwingSchedulerDAG::postprocessDAG() {
3476  for (auto &M : Mutations)
3477  M->apply(this);
3478 }
3479 
3480 /// Try to schedule the node at the specified StartCycle and continue
3481 /// until the node is schedule or the EndCycle is reached. This function
3482 /// returns true if the node is scheduled. This routine may search either
3483 /// forward or backward for a place to insert the instruction based upon
3484 /// the relative values of StartCycle and EndCycle.
3485 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3486  bool forward = true;
3487  if (StartCycle > EndCycle)
3488  forward = false;
3489 
3490  // The terminating condition depends on the direction.
3491  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3492  for (int curCycle = StartCycle; curCycle != termCycle;
3493  forward ? ++curCycle : --curCycle) {
3494 
3495  // Add the already scheduled instructions at the specified cycle to the DFA.
3496  Resources->clearResources();
3497  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3498  checkCycle <= LastCycle; checkCycle += II) {
3499  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3500 
3501  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3502  E = cycleInstrs.end();
3503  I != E; ++I) {
3504  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3505  continue;
3506  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3507  "These instructions have already been scheduled.");
3508  Resources->reserveResources(*(*I)->getInstr());
3509  }
3510  }
3511  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3512  Resources->canReserveResources(*SU->getInstr())) {
3513  DEBUG({
3514  dbgs() << "\tinsert at cycle " << curCycle << " ";
3515  SU->getInstr()->dump();
3516  });
3517 
3518  ScheduledInstrs[curCycle].push_back(SU);
3519  InstrToCycle.insert(std::make_pair(SU, curCycle));
3520  if (curCycle > LastCycle)
3521  LastCycle = curCycle;
3522  if (curCycle < FirstCycle)
3523  FirstCycle = curCycle;
3524  return true;
3525  }
3526  DEBUG({
3527  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3528  SU->getInstr()->dump();
3529  });
3530  }
3531  return false;
3532 }
3533 
3534 // Return the cycle of the earliest scheduled instruction in the chain.
3535 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3536  SmallPtrSet<SUnit *, 8> Visited;
3537  SmallVector<SDep, 8> Worklist;
3538  Worklist.push_back(Dep);
3539  int EarlyCycle = INT_MAX;
3540  while (!Worklist.empty()) {
3541  const SDep &Cur = Worklist.pop_back_val();
3542  SUnit *PrevSU = Cur.getSUnit();
3543  if (Visited.count(PrevSU))
3544  continue;
3545  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3546  if (it == InstrToCycle.end())
3547  continue;
3548  EarlyCycle = std::min(EarlyCycle, it->second);
3549  for (const auto &PI : PrevSU->Preds)
3550  if (SwingSchedulerDAG::isOrder(PrevSU, PI))
3551  Worklist.push_back(PI);
3552  Visited.insert(PrevSU);
3553  }
3554  return EarlyCycle;
3555 }
3556 
3557 // Return the cycle of the latest scheduled instruction in the chain.
3558 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3559  SmallPtrSet<SUnit *, 8> Visited;
3560  SmallVector<SDep, 8> Worklist;
3561  Worklist.push_back(Dep);
3562  int LateCycle = INT_MIN;
3563  while (!Worklist.empty()) {
3564  const SDep &Cur = Worklist.pop_back_val();
3565  SUnit *SuccSU = Cur.getSUnit();
3566  if (Visited.count(SuccSU))
3567  continue;
3568  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3569  if (it == InstrToCycle.end())
3570  continue;
3571  LateCycle = std::max(LateCycle, it->second);
3572  for (const auto &SI : SuccSU->Succs)
3573  if (SwingSchedulerDAG::isOrder(SuccSU, SI))
3574  Worklist.push_back(SI);
3575  Visited.insert(SuccSU);
3576  }
3577  return LateCycle;
3578 }
3579 
3580 /// If an instruction has a use that spans multiple iterations, then
3581 /// return true. These instructions are characterized by having a back-ege
3582 /// to a Phi, which contains a reference to another Phi.
3583 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3584  for (auto &P : SU->Preds)
3585  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3586  for (auto &S : P.getSUnit()->Succs)
3587  if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
3588  return P.getSUnit();
3589  return nullptr;
3590 }
3591 
3592 /// Compute the scheduling start slot for the instruction. The start slot
3593 /// depends on any predecessor or successor nodes scheduled already.
3594 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3595  int *MinEnd, int *MaxStart, int II,
3596  SwingSchedulerDAG *DAG) {
3597  // Iterate over each instruction that has been scheduled already. The start
3598  // slot computuation depends on whether the previously scheduled instruction
3599  // is a predecessor or successor of the specified instruction.
3600  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3601 
3602  // Iterate over each instruction in the current cycle.
3603  for (SUnit *I : getInstructions(cycle)) {
3604  // Because we're processing a DAG for the dependences, we recognize
3605  // the back-edge in recurrences by anti dependences.
3606  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3607  const SDep &Dep = SU->Preds[i];
3608  if (Dep.getSUnit() == I) {
3609  if (!DAG->isBackedge(SU, Dep)) {
3610  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3611  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3612  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3613  if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
3614  int End = earliestCycleInChain(Dep) + (II - 1);
3615  *MinEnd = std::min(*MinEnd, End);
3616  }
3617  } else {
3618  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3619  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3620  *MinLateStart = std::min(*MinLateStart, LateStart);
3621  }
3622  }
3623  // For instruction that requires multiple iterations, make sure that
3624  // the dependent instruction is not scheduled past the definition.
3625  SUnit *BE = multipleIterations(I, DAG);
3626  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3627  !SU->isPred(I))
3628  *MinLateStart = std::min(*MinLateStart, cycle);
3629  }
3630  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
3631  if (SU->Succs[i].getSUnit() == I) {
3632  const SDep &Dep = SU->Succs[i];
3633  if (!DAG->isBackedge(SU, Dep)) {
3634  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3635  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3636  *MinLateStart = std::min(*MinLateStart, LateStart);
3637  if (DAG->isLoopCarriedOrder(SU, Dep)) {
3638  int Start = latestCycleInChain(Dep) + 1 - II;
3639  *MaxStart = std::max(*MaxStart, Start);
3640  }
3641  } else {
3642  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3643  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3644  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3645  }
3646  }
3647  }
3648  }
3649 }
3650 
3651 /// Order the instructions within a cycle so that the definitions occur
3652 /// before the uses. Returns true if the instruction is added to the start
3653 /// of the list, or false if added to the end.
3654 bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3655  std::deque<SUnit *> &Insts) {
3656  MachineInstr *MI = SU->getInstr();
3657  bool OrderBeforeUse = false;
3658  bool OrderAfterDef = false;
3659  bool OrderBeforeDef = false;
3660  unsigned MoveDef = 0;
3661  unsigned MoveUse = 0;
3662  int StageInst1 = stageScheduled(SU);
3663 
3664  unsigned Pos = 0;
3665  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3666  ++I, ++Pos) {
3667  // Relative order of Phis does not matter.
3668  if (MI->isPHI() && (*I)->getInstr()->isPHI())
3669  continue;
3670  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3671  MachineOperand &MO = MI->getOperand(i);
3673  continue;
3674  unsigned Reg = MO.getReg();
3675  unsigned BasePos, OffsetPos;
3676  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3677  if (MI->getOperand(BasePos).getReg() == Reg)
3678  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3679  Reg = NewReg;
3680  bool Reads, Writes;
3681  std::tie(Reads, Writes) =
3682  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3683  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3684  OrderBeforeUse = true;
3685  MoveUse = Pos;
3686  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3687  // Add the instruction after the scheduled instruction.
3688  OrderAfterDef = true;
3689  MoveDef = Pos;
3690  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3691  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3692  OrderBeforeUse = true;
3693  MoveUse = Pos;
3694  } else {
3695  OrderAfterDef = true;
3696  MoveDef = Pos;
3697  }
3698  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3699  OrderBeforeUse = true;
3700  MoveUse = Pos;
3701  if (MoveUse != 0) {
3702  OrderAfterDef = true;
3703  MoveDef = Pos - 1;
3704  }
3705  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3706  // Add the instruction before the scheduled instruction.
3707  OrderBeforeUse = true;
3708  MoveUse = Pos;
3709  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3710  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3711  OrderBeforeDef = true;
3712  MoveUse = Pos;
3713  }
3714  }
3715  // Check for order dependences between instructions. Make sure the source
3716  // is ordered before the destination.
3717  for (auto &S : SU->Succs)
3718  if (S.getKind() == SDep::Order) {
3719  if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3720  OrderBeforeUse = true;
3721  MoveUse = Pos;
3722  }
3723  } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
3724  if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
3725  if (S.isAssignedRegDep()) {
3726  OrderAfterDef = true;
3727  MoveDef = Pos;
3728  }
3729  } else {
3730  OrderBeforeUse = true;
3731  MoveUse = Pos;
3732  }
3733  }
3734  for (auto &P : SU->Preds)
3735  if (P.getKind() == SDep::Order) {
3736  if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3737  OrderAfterDef = true;
3738  MoveDef = Pos;
3739  }
3740  } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
3741  if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
3742  if (P.isAssignedRegDep()) {
3743  OrderBeforeUse = true;
3744  MoveUse = Pos;
3745  }
3746  } else {
3747  OrderAfterDef = true;
3748  MoveDef = Pos;
3749  }
3750  }
3751  }
3752 
3753  // A circular dependence.
3754  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3755  OrderBeforeUse = false;
3756 
3757  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3758  // to a loop-carried dependence.
3759  if (OrderBeforeDef)
3760  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3761 
3762  // The uncommon case when the instruction order needs to be updated because
3763  // there is both a use and def.
3764  if (OrderBeforeUse && OrderAfterDef) {
3765  SUnit *UseSU = Insts.at(MoveUse);
3766  SUnit *DefSU = Insts.at(MoveDef);
3767  if (MoveUse > MoveDef) {
3768  Insts.erase(Insts.begin() + MoveUse);
3769  Insts.erase(Insts.begin() + MoveDef);
3770  } else {
3771  Insts.erase(Insts.begin() + MoveDef);
3772  Insts.erase(Insts.begin() + MoveUse);
3773  }
3774  if (orderDependence(SSD, UseSU, Insts)) {
3775  Insts.push_front(SU);
3776  orderDependence(SSD, DefSU, Insts);
3777  return true;
3778  }
3779  Insts.pop_back();
3780  Insts.push_back(SU);
3781  Insts.push_back(UseSU);
3782  orderDependence(SSD, DefSU, Insts);
3783  return false;
3784  }
3785  // Put the new instruction first if there is a use in the list. Otherwise,
3786  // put it at the end of the list.
3787  if (OrderBeforeUse)
3788  Insts.push_front(SU);
3789  else
3790  Insts.push_back(SU);
3791  return OrderBeforeUse;
3792 }
3793 
3794 /// Return true if the scheduled Phi has a loop carried operand.
3795 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3796  if (!Phi.isPHI())
3797  return false;
3798  assert(Phi.isPHI() && "Expecing a Phi.");
3799  SUnit *DefSU = SSD->getSUnit(&Phi);
3800  unsigned DefCycle = cycleScheduled(DefSU);
3801  int DefStage = stageScheduled(DefSU);
3802 
3803  unsigned InitVal = 0;
3804  unsigned LoopVal = 0;
3805  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3806  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3807  if (!UseSU)
3808  return true;
3809  if (UseSU->getInstr()->isPHI())
3810  return true;
3811  unsigned LoopCycle = cycleScheduled(UseSU);
3812  int LoopStage = stageScheduled(UseSU);
3813  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3814 }
3815 
3816 /// Return true if the instruction is a definition that is loop carried
3817 /// and defines the use on the next iteration.
3818 /// v1 = phi(v2, v3)
3819 /// (Def) v3 = op v1
3820 /// (MO) = v1
3821 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3822 /// register.
3823 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
3825  if (!MO.isReg())
3826  return false;
3827  if (Def->isPHI())
3828  return false;
3829  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3830  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3831  return false;
3832  if (!isLoopCarried(SSD, *Phi))
3833  return false;
3834  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3835  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3836  MachineOperand &DMO = Def->getOperand(i);
3837  if (!DMO.isReg() || !DMO.isDef())
3838  continue;
3839  if (DMO.getReg() == LoopReg)
3840  return true;
3841  }
3842  return false;
3843 }
3844 
3845 // Check if the generated schedule is valid. This function checks if
3846 // an instruction that uses a physical register is scheduled in a
3847 // different stage than the definition. The pipeliner does not handle
3848 // physical register values that may cross a basic block boundary.
3849 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3850  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3851  SUnit &SU = SSD->SUnits[i];
3852  if (!SU.hasPhysRegDefs)
3853  continue;
3854  int StageDef = stageScheduled(&SU);
3855  assert(StageDef != -1 && "Instruction should have been scheduled.");
3856  for (auto &SI : SU.Succs)
3857  if (SI.isAssignedRegDep())
3858  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3859  if (stageScheduled(SI.getSUnit()) != StageDef)
3860  return false;
3861  }
3862  return true;
3863 }
3864 
3865 /// Attempt to fix the degenerate cases when the instruction serialization
3866 /// causes the register lifetimes to overlap. For example,
3867 /// p' = store_pi(p, b)
3868 /// = load p, offset
3869 /// In this case p and p' overlap, which means that two registers are needed.
3870 /// Instead, this function changes the load to use p' and updates the offset.
3871 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3872  unsigned OverlapReg = 0;
3873  unsigned NewBaseReg = 0;
3874  for (SUnit *SU : Instrs) {
3875  MachineInstr *MI = SU->getInstr();
3876  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3877  const MachineOperand &MO = MI->getOperand(i);
3878  // Look for an instruction that uses p. The instruction occurs in the
3879  // same cycle but occurs later in the serialized order.
3880  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3881  // Check that the instruction appears in the InstrChanges structure,
3882  // which contains instructions that can have the offset updated.
3884  InstrChanges.find(SU);
3885  if (It != InstrChanges.end()) {
3886  unsigned BasePos, OffsetPos;
3887  // Update the base register and adjust the offset.
3888  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3889  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3890  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3891  int64_t NewOffset =
3892  MI->getOperand(OffsetPos).getImm() - It->second.second;
3893  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3894  SU->setInstr(NewMI);
3895  MISUnitMap[NewMI] = SU;
3896  NewMIs.insert(NewMI);
3897  }
3898  }
3899  OverlapReg = 0;
3900  NewBaseReg = 0;
3901  break;
3902  }
3903  // Look for an instruction of the form p' = op(p), which uses and defines
3904  // two virtual registers that get allocated to the same physical register.
3905  unsigned TiedUseIdx = 0;
3906  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3907  // OverlapReg is p in the example above.
3908  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3909  // NewBaseReg is p' in the example above.
3910  NewBaseReg = MI->getOperand(i).getReg();
3911  break;
3912  }
3913  }
3914  }
3915 }
3916 
3917 /// After the schedule has been formed, call this function to combine
3918 /// the instructions from the different stages/cycles. That is, this
3919 /// function creates a schedule that represents a single iteration.
3920 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
3921  // Move all instructions to the first stage from later stages.
3922  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3923  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3924  ++stage) {
3925  std::deque<SUnit *> &cycleInstrs =
3926  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3927  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3928  E = cycleInstrs.rend();
3929  I != E; ++I)
3930  ScheduledInstrs[cycle].push_front(*I);
3931  }
3932  }
3933  // Iterate over the definitions in each instruction, and compute the
3934  // stage difference for each use. Keep the maximum value.
3935  for (auto &I : InstrToCycle) {
3936  int DefStage = stageScheduled(I.first);
3937  MachineInstr *MI = I.first->getInstr();
3938  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3939  MachineOperand &Op = MI->getOperand(i);
3940  if (!Op.isReg() || !Op.isDef())
3941  continue;
3942 
3943  unsigned Reg = Op.getReg();
3944  unsigned MaxDiff = 0;
3945  bool PhiIsSwapped = false;
3946  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3947  EI = MRI.use_end();
3948  UI != EI; ++UI) {
3949  MachineOperand &UseOp = *UI;
3950  MachineInstr *UseMI = UseOp.getParent();
3951  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3952  int UseStage = stageScheduled(SUnitUse);
3953  unsigned Diff = 0;
3954  if (UseStage != -1 && UseStage >= DefStage)
3955  Diff = UseStage - DefStage;
3956  if (MI->isPHI()) {
3957  if (isLoopCarried(SSD, *MI))
3958  ++Diff;
3959  else
3960  PhiIsSwapped = true;
3961  }
3962  MaxDiff = std::max(Diff, MaxDiff);
3963  }
3964  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3965  }
3966  }
3967 
3968  // Erase all the elements in the later stages. Only one iteration should
3969  // remain in the scheduled list, and it contains all the instructions.
3970  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3971  ScheduledInstrs.erase(cycle);
3972 
3973  // Change the registers in instruction as specified in the InstrChanges
3974  // map. We need to use the new registers to create the correct order.
3975  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3976  SUnit *SU = &SSD->SUnits[i];
3977  SSD->applyInstrChange(SU->getInstr(), *this);
3978  }
3979 
3980  // Reorder the instructions in each cycle to fix and improve the
3981  // generated code.
3982  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3983  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3984  std::deque<SUnit *> newOrderZC;
3985  // Put the zero-cost, pseudo instructions at the start of the cycle.
3986  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3987  SUnit *SU = cycleInstrs[i];
3988  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3989  orderDependence(SSD, SU, newOrderZC);
3990  }
3991  std::deque<SUnit *> newOrderI;
3992  // Then, add the regular instructions back.
3993  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3994  SUnit *SU = cycleInstrs[i];
3995  if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3996  orderDependence(SSD, SU, newOrderI);
3997  }
3998  // Replace the old order with the new order.
3999  cycleInstrs.swap(newOrderZC);
4000  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
4001  SSD->fixupRegisterOverlaps(cycleInstrs);
4002  }
4003 
4004  DEBUG(dump(););
4005 }
4006 
4007 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4008 /// Print the schedule information to the given output.
4009 void SMSchedule::print(raw_ostream &os) const {
4010  // Iterate over each cycle.
4011  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4012  // Iterate over each instruction in the cycle.
4013  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
4014  for (SUnit *CI : cycleInstrs->second) {
4015  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
4016  os << "(" << CI->NodeNum << ") ";
4017  CI->getInstr()->print(os);
4018  os << "\n";
4019  }
4020  }
4021 }
4022 
4023 /// Utility function used for debugging to print the schedule.
4024 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
4025 #endif
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:755
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:245
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
BitVector & set()
Definition: BitVector.h:398
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:330
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:85
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:461
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:236
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void dump(const ScheduleDAG *G) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isInlineAsm() const
Definition: MachineInstr.h:835
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:399
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
bool isPHI() const
Definition: MachineInstr.h:829
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:265
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:264
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:222
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
BlockT * getHeader() const
Definition: LoopInfo.h:100
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:425
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:290
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
iterator find(const KeyT &Key)
Definition: MapVector.h:144
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:880
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
TargetInstrInfo - Interface to description of machine instruction set.
const Value * getValue() const
Return the base address of the memory access.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:565
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:642
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
Represent the analysis usage information of a pass.
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:407
BitVector & reset()
Definition: BitVector.h:439
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:854
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:50
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
print lazy value Lazy Value Info Printer Pass
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:392
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:748
void clear()
Completely clear the SetVector.
Definition: SetVector.h:216
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2)
Return true if Inst1 defines a value that is used in Inst2.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:267
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:210
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:480
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:142
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1948
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:60
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
void print(raw_ostream &OS, bool SkipOpers=false, bool SkipDebugLoc=false, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:629
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:68
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
mop_iterator operands_begin()
Definition: MachineInstr.h:329
A vector that has set insertion semantics.
Definition: SetVector.h:41
static use_instr_iterator use_instr_end()
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:200
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1946
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the the loop block.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr&#39;s memory reference descriptor list.
std::vector< MachineBasicBlock * >::iterator succ_iterator
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:393
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:436