LLVM  6.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and explioits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/MapVector.h"
65 #include "llvm/ADT/PriorityQueue.h"
66 #include "llvm/ADT/SetVector.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/Statistic.h"
96 #include "llvm/IR/Attributes.h"
97 #include "llvm/IR/DebugLoc.h"
98 #include "llvm/IR/Function.h"
99 #include "llvm/MC/LaneBitmask.h"
100 #include "llvm/MC/MCInstrDesc.h"
102 #include "llvm/MC/MCRegisterInfo.h"
103 #include "llvm/Pass.h"
105 #include "llvm/Support/Compiler.h"
106 #include "llvm/Support/Debug.h"
107 #include "llvm/Support/MathExtras.h"
109 #include <algorithm>
110 #include <cassert>
111 #include <climits>
112 #include <cstdint>
113 #include <deque>
114 #include <functional>
115 #include <iterator>
116 #include <map>
117 #include <memory>
118 #include <tuple>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 #define DEBUG_TYPE "pipeliner"
125 
126 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
127 STATISTIC(NumPipelined, "Number of loops software pipelined");
128 
129 /// A command line option to turn software pipelining on or off.
130 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
132  cl::desc("Enable Software Pipelining"));
133 
134 /// A command line option to enable SWP at -Os.
135 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
136  cl::desc("Enable SWP at Os."), cl::Hidden,
137  cl::init(false));
138 
139 /// A command line argument to limit minimum initial interval for pipelining.
140 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
141  cl::desc("Size limit for the the MII."),
142  cl::Hidden, cl::init(27));
143 
144 /// A command line argument to limit the number of stages in the pipeline.
145 static cl::opt<int>
146  SwpMaxStages("pipeliner-max-stages",
147  cl::desc("Maximum stages allowed in the generated scheduled."),
148  cl::Hidden, cl::init(3));
149 
150 /// A command line option to disable the pruning of chain dependences due to
151 /// an unrelated Phi.
152 static cl::opt<bool>
153  SwpPruneDeps("pipeliner-prune-deps",
154  cl::desc("Prune dependences between unrelated Phi nodes."),
155  cl::Hidden, cl::init(true));
156 
157 /// A command line option to disable the pruning of loop carried order
158 /// dependences.
159 static cl::opt<bool>
160  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
161  cl::desc("Prune loop carried order dependences."),
162  cl::Hidden, cl::init(true));
163 
164 #ifndef NDEBUG
165 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
166 #endif
167 
168 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
169  cl::ReallyHidden, cl::init(false),
170  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
171 
172 namespace {
173 
174 class NodeSet;
175 class SMSchedule;
176 
177 /// The main class in the implementation of the target independent
178 /// software pipeliner pass.
179 class MachinePipeliner : public MachineFunctionPass {
180 public:
181  MachineFunction *MF = nullptr;
182  const MachineLoopInfo *MLI = nullptr;
183  const MachineDominatorTree *MDT = nullptr;
184  const InstrItineraryData *InstrItins;
185  const TargetInstrInfo *TII = nullptr;
186  RegisterClassInfo RegClassInfo;
187 
188 #ifndef NDEBUG
189  static int NumTries;
190 #endif
191 
192  /// Cache the target analysis information about the loop.
193  struct LoopInfo {
194  MachineBasicBlock *TBB = nullptr;
195  MachineBasicBlock *FBB = nullptr;
197  MachineInstr *LoopInductionVar = nullptr;
198  MachineInstr *LoopCompare = nullptr;
199  };
200  LoopInfo LI;
201 
202  static char ID;
203 
204  MachinePipeliner() : MachineFunctionPass(ID) {
206  }
207 
208  bool runOnMachineFunction(MachineFunction &MF) override;
209 
210  void getAnalysisUsage(AnalysisUsage &AU) const override {
217  }
218 
219 private:
220  bool canPipelineLoop(MachineLoop &L);
221  bool scheduleLoop(MachineLoop &L);
222  bool swingModuloScheduler(MachineLoop &L);
223 };
224 
225 /// This class builds the dependence graph for the instructions in a loop,
226 /// and attempts to schedule the instructions using the SMS algorithm.
227 class SwingSchedulerDAG : public ScheduleDAGInstrs {
228  MachinePipeliner &Pass;
229  /// The minimum initiation interval between iterations for this schedule.
230  unsigned MII = 0;
231  /// Set to true if a valid pipelined schedule is found for the loop.
232  bool Scheduled = false;
233  MachineLoop &Loop;
234  LiveIntervals &LIS;
235  const RegisterClassInfo &RegClassInfo;
236 
237  /// A toplogical ordering of the SUnits, which is needed for changing
238  /// dependences and iterating over the SUnits.
240 
241  struct NodeInfo {
242  int ASAP = 0;
243  int ALAP = 0;
244 
245  NodeInfo() = default;
246  };
247  /// Computed properties for each node in the graph.
248  std::vector<NodeInfo> ScheduleInfo;
249 
250  enum OrderKind { BottomUp = 0, TopDown = 1 };
251  /// Computed node ordering for scheduling.
253 
254  using NodeSetType = SmallVector<NodeSet, 8>;
255  using ValueMapTy = DenseMap<unsigned, unsigned>;
256  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
257  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
258 
259  /// Instructions to change when emitting the final schedule.
261 
262  /// We may create a new instruction, so remember it because it
263  /// must be deleted when the pass is finished.
265 
266  /// Ordered list of DAG postprocessing steps.
267  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
268 
269  /// Helper class to implement Johnson's circuit finding algorithm.
270  class Circuits {
271  std::vector<SUnit> &SUnits;
272  SetVector<SUnit *> Stack;
273  BitVector Blocked;
276  unsigned NumPaths;
277  static unsigned MaxPaths;
278 
279  public:
280  Circuits(std::vector<SUnit> &SUs)
281  : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
282 
283  /// Reset the data structures used in the circuit algorithm.
284  void reset() {
285  Stack.clear();
286  Blocked.reset();
287  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
288  NumPaths = 0;
289  }
290 
291  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
292  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
293  void unblock(int U);
294  };
295 
296 public:
297  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
298  const RegisterClassInfo &rci)
299  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
300  RegClassInfo(rci), Topo(SUnits, &ExitSU) {
301  P.MF->getSubtarget().getSMSMutations(Mutations);
302  }
303 
304  void schedule() override;
305  void finishBlock() override;
306 
307  /// Return true if the loop kernel has been scheduled.
308  bool hasNewSchedule() { return Scheduled; }
309 
310  /// Return the earliest time an instruction may be scheduled.
311  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
312 
313  /// Return the latest time an instruction my be scheduled.
314  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
315 
316  /// The mobility function, which the the number of slots in which
317  /// an instruction may be scheduled.
318  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
319 
320  /// The depth, in the dependence graph, for a node.
321  int getDepth(SUnit *Node) { return Node->getDepth(); }
322 
323  /// The height, in the dependence graph, for a node.
324  int getHeight(SUnit *Node) { return Node->getHeight(); }
325 
326  /// Return true if the dependence is a back-edge in the data dependence graph.
327  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
328  /// using an anti dependence from a Phi to an instruction.
329  bool isBackedge(SUnit *Source, const SDep &Dep) {
330  if (Dep.getKind() != SDep::Anti)
331  return false;
332  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
333  }
334 
335  /// Return true if the dependence is an order dependence between non-Phis.
336  static bool isOrder(SUnit *Source, const SDep &Dep) {
337  if (Dep.getKind() != SDep::Order)
338  return false;
339  return (!Source->getInstr()->isPHI() &&
340  !Dep.getSUnit()->getInstr()->isPHI());
341  }
342 
343  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
344 
345  /// The latency of the dependence.
346  unsigned getLatency(SUnit *Source, const SDep &Dep) {
347  // Anti dependences represent recurrences, so use the latency of the
348  // instruction on the back-edge.
349  if (Dep.getKind() == SDep::Anti) {
350  if (Source->getInstr()->isPHI())
351  return Dep.getSUnit()->Latency;
352  if (Dep.getSUnit()->getInstr()->isPHI())
353  return Source->Latency;
354  return Dep.getLatency();
355  }
356  return Dep.getLatency();
357  }
358 
359  /// The distance function, which indicates that operation V of iteration I
360  /// depends on operations U of iteration I-distance.
361  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
362  // Instructions that feed a Phi have a distance of 1. Computing larger
363  // values for arrays requires data dependence information.
364  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
365  return 1;
366  return 0;
367  }
368 
369  /// Set the Minimum Initiation Interval for this schedule attempt.
370  void setMII(unsigned mii) { MII = mii; }
371 
372  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
373 
374  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
375 
376  /// Return the new base register that was stored away for the changed
377  /// instruction.
378  unsigned getInstrBaseReg(SUnit *SU) {
380  InstrChanges.find(SU);
381  if (It != InstrChanges.end())
382  return It->second.first;
383  return 0;
384  }
385 
386  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
387  Mutations.push_back(std::move(Mutation));
388  }
389 
390 private:
391  void addLoopCarriedDependences(AliasAnalysis *AA);
392  void updatePhiDependences();
393  void changeDependences();
394  unsigned calculateResMII();
395  unsigned calculateRecMII(NodeSetType &RecNodeSets);
396  void findCircuits(NodeSetType &NodeSets);
397  void fuseRecs(NodeSetType &NodeSets);
398  void removeDuplicateNodes(NodeSetType &NodeSets);
399  void computeNodeFunctions(NodeSetType &NodeSets);
400  void registerPressureFilter(NodeSetType &NodeSets);
401  void colocateNodeSets(NodeSetType &NodeSets);
402  void checkNodeSets(NodeSetType &NodeSets);
403  void groupRemainingNodes(NodeSetType &NodeSets);
404  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
405  SetVector<SUnit *> &NodesAdded);
406  void computeNodeOrder(NodeSetType &NodeSets);
407  bool schedulePipeline(SMSchedule &Schedule);
408  void generatePipelinedLoop(SMSchedule &Schedule);
409  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
410  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
411  MBBVectorTy &PrologBBs);
412  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
413  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
414  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
415  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
416  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
417  SMSchedule &Schedule, ValueMapTy *VRMap,
418  InstrMapTy &InstrMap, unsigned LastStageNum,
419  unsigned CurStageNum, bool IsLast);
420  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
421  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
422  SMSchedule &Schedule, ValueMapTy *VRMap,
423  InstrMapTy &InstrMap, unsigned LastStageNum,
424  unsigned CurStageNum, bool IsLast);
425  void removeDeadInstructions(MachineBasicBlock *KernelBB,
426  MBBVectorTy &EpilogBBs);
427  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
428  SMSchedule &Schedule);
429  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
430  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
431  ValueMapTy *VRMap);
432  bool computeDelta(MachineInstr &MI, unsigned &Delta);
433  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
434  unsigned Num);
435  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
436  unsigned InstStageNum);
437  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
438  unsigned InstStageNum,
439  SMSchedule &Schedule);
440  void updateInstruction(MachineInstr *NewMI, bool LastDef,
441  unsigned CurStageNum, unsigned InstStageNum,
442  SMSchedule &Schedule, ValueMapTy *VRMap);
443  MachineInstr *findDefInLoop(unsigned Reg);
444  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
445  unsigned LoopStage, ValueMapTy *VRMap,
446  MachineBasicBlock *BB);
447  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
448  SMSchedule &Schedule, ValueMapTy *VRMap,
449  InstrMapTy &InstrMap);
450  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
451  InstrMapTy &InstrMap, unsigned CurStageNum,
452  unsigned PhiNum, MachineInstr *Phi,
453  unsigned OldReg, unsigned NewReg,
454  unsigned PrevReg = 0);
455  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
456  unsigned &OffsetPos, unsigned &NewBase,
457  int64_t &NewOffset);
458  void postprocessDAG();
459 };
460 
461 /// A NodeSet contains a set of SUnit DAG nodes with additional information
462 /// that assigns a priority to the set.
463 class NodeSet {
464  SetVector<SUnit *> Nodes;
465  bool HasRecurrence = false;
466  unsigned RecMII = 0;
467  int MaxMOV = 0;
468  int MaxDepth = 0;
469  unsigned Colocate = 0;
470  SUnit *ExceedPressure = nullptr;
471 
472 public:
473  using iterator = SetVector<SUnit *>::const_iterator;
474 
475  NodeSet() = default;
476  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
477 
478  bool insert(SUnit *SU) { return Nodes.insert(SU); }
479 
480  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
481 
482  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
483  return Nodes.remove_if(P);
484  }
485 
486  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
487 
488  bool hasRecurrence() { return HasRecurrence; };
489 
490  unsigned size() const { return Nodes.size(); }
491 
492  bool empty() const { return Nodes.empty(); }
493 
494  SUnit *getNode(unsigned i) const { return Nodes[i]; };
495 
496  void setRecMII(unsigned mii) { RecMII = mii; };
497 
498  void setColocate(unsigned c) { Colocate = c; };
499 
500  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
501 
502  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
503 
504  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
505 
506  int getRecMII() { return RecMII; }
507 
508  /// Summarize node functions for the entire node set.
509  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
510  for (SUnit *SU : *this) {
511  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
512  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
513  }
514  }
515 
516  void clear() {
517  Nodes.clear();
518  RecMII = 0;
519  HasRecurrence = false;
520  MaxMOV = 0;
521  MaxDepth = 0;
522  Colocate = 0;
523  ExceedPressure = nullptr;
524  }
525 
526  operator SetVector<SUnit *> &() { return Nodes; }
527 
528  /// Sort the node sets by importance. First, rank them by recurrence MII,
529  /// then by mobility (least mobile done first), and finally by depth.
530  /// Each node set may contain a colocate value which is used as the first
531  /// tie breaker, if it's set.
532  bool operator>(const NodeSet &RHS) const {
533  if (RecMII == RHS.RecMII) {
534  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
535  return Colocate < RHS.Colocate;
536  if (MaxMOV == RHS.MaxMOV)
537  return MaxDepth > RHS.MaxDepth;
538  return MaxMOV < RHS.MaxMOV;
539  }
540  return RecMII > RHS.RecMII;
541  }
542 
543  bool operator==(const NodeSet &RHS) const {
544  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
545  MaxDepth == RHS.MaxDepth;
546  }
547 
548  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
549 
550  iterator begin() { return Nodes.begin(); }
551  iterator end() { return Nodes.end(); }
552 
553  void print(raw_ostream &os) const {
554  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
555  << " depth " << MaxDepth << " col " << Colocate << "\n";
556  for (const auto &I : Nodes)
557  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
558  os << "\n";
559  }
560 
561 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
563 #endif
564 };
565 
566 /// This class repesents the scheduled code. The main data structure is a
567 /// map from scheduled cycle to instructions. During scheduling, the
568 /// data structure explicitly represents all stages/iterations. When
569 /// the algorithm finshes, the schedule is collapsed into a single stage,
570 /// which represents instructions from different loop iterations.
571 ///
572 /// The SMS algorithm allows negative values for cycles, so the first cycle
573 /// in the schedule is the smallest cycle value.
574 class SMSchedule {
575 private:
576  /// Map from execution cycle to instructions.
577  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
578 
579  /// Map from instruction to execution cycle.
580  std::map<SUnit *, int> InstrToCycle;
581 
582  /// Map for each register and the max difference between its uses and def.
583  /// The first element in the pair is the max difference in stages. The
584  /// second is true if the register defines a Phi value and loop value is
585  /// scheduled before the Phi.
586  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
587 
588  /// Keep track of the first cycle value in the schedule. It starts
589  /// as zero, but the algorithm allows negative values.
590  int FirstCycle = 0;
591 
592  /// Keep track of the last cycle value in the schedule.
593  int LastCycle = 0;
594 
595  /// The initiation interval (II) for the schedule.
596  int InitiationInterval = 0;
597 
598  /// Target machine information.
599  const TargetSubtargetInfo &ST;
600 
601  /// Virtual register information.
603 
604  std::unique_ptr<DFAPacketizer> Resources;
605 
606 public:
607  SMSchedule(MachineFunction *mf)
608  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
609  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
610 
611  void reset() {
612  ScheduledInstrs.clear();
613  InstrToCycle.clear();
614  RegToStageDiff.clear();
615  FirstCycle = 0;
616  LastCycle = 0;
617  InitiationInterval = 0;
618  }
619 
620  /// Set the initiation interval for this schedule.
621  void setInitiationInterval(int ii) { InitiationInterval = ii; }
622 
623  /// Return the first cycle in the completed schedule. This
624  /// can be a negative value.
625  int getFirstCycle() const { return FirstCycle; }
626 
627  /// Return the last cycle in the finalized schedule.
628  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
629 
630  /// Return the cycle of the earliest scheduled instruction in the dependence
631  /// chain.
632  int earliestCycleInChain(const SDep &Dep);
633 
634  /// Return the cycle of the latest scheduled instruction in the dependence
635  /// chain.
636  int latestCycleInChain(const SDep &Dep);
637 
638  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
639  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
640  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
641 
642  /// Iterators for the cycle to instruction map.
643  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
644  using const_sched_iterator =
645  DenseMap<int, std::deque<SUnit *>>::const_iterator;
646 
647  /// Return true if the instruction is scheduled at the specified stage.
648  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
649  return (stageScheduled(SU) == (int)StageNum);
650  }
651 
652  /// Return the stage for a scheduled instruction. Return -1 if
653  /// the instruction has not been scheduled.
654  int stageScheduled(SUnit *SU) const {
655  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
656  if (it == InstrToCycle.end())
657  return -1;
658  return (it->second - FirstCycle) / InitiationInterval;
659  }
660 
661  /// Return the cycle for a scheduled instruction. This function normalizes
662  /// the first cycle to be 0.
663  unsigned cycleScheduled(SUnit *SU) const {
664  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
665  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
666  return (it->second - FirstCycle) % InitiationInterval;
667  }
668 
669  /// Return the maximum stage count needed for this schedule.
670  unsigned getMaxStageCount() {
671  return (LastCycle - FirstCycle) / InitiationInterval;
672  }
673 
674  /// Return the max. number of stages/iterations that can occur between a
675  /// register definition and its uses.
676  unsigned getStagesForReg(int Reg, unsigned CurStage) {
677  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
678  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
679  return 1;
680  return Stages.first;
681  }
682 
683  /// The number of stages for a Phi is a little different than other
684  /// instructions. The minimum value computed in RegToStageDiff is 1
685  /// because we assume the Phi is needed for at least 1 iteration.
686  /// This is not the case if the loop value is scheduled prior to the
687  /// Phi in the same stage. This function returns the number of stages
688  /// or iterations needed between the Phi definition and any uses.
689  unsigned getStagesForPhi(int Reg) {
690  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
691  if (Stages.second)
692  return Stages.first;
693  return Stages.first - 1;
694  }
695 
696  /// Return the instructions that are scheduled at the specified cycle.
697  std::deque<SUnit *> &getInstructions(int cycle) {
698  return ScheduledInstrs[cycle];
699  }
700 
701  bool isValidSchedule(SwingSchedulerDAG *SSD);
702  void finalizeSchedule(SwingSchedulerDAG *SSD);
703  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
704  std::deque<SUnit *> &Insts);
705  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
706  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
707  MachineOperand &MO);
708  void print(raw_ostream &os) const;
709  void dump() const;
710 };
711 
712 } // end anonymous namespace
713 
714 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
715 char MachinePipeliner::ID = 0;
716 #ifndef NDEBUG
717 int MachinePipeliner::NumTries = 0;
718 #endif
720 
721 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
722  "Modulo Software Pipelining", false, false)
727 INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
728  "Modulo Software Pipelining", false, false)
729 
730 /// The "main" function for implementing Swing Modulo Scheduling.
731 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
732  if (skipFunction(*mf.getFunction()))
733  return false;
734 
735  if (!EnableSWP)
736  return false;
737 
738  if (mf.getFunction()->getAttributes().hasAttribute(
739  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
740  !EnableSWPOptSize.getPosition())
741  return false;
742 
743  MF = &mf;
744  MLI = &getAnalysis<MachineLoopInfo>();
745  MDT = &getAnalysis<MachineDominatorTree>();
746  TII = MF->getSubtarget().getInstrInfo();
747  RegClassInfo.runOnMachineFunction(*MF);
748 
749  for (auto &L : *MLI)
750  scheduleLoop(*L);
751 
752  return false;
753 }
754 
755 /// Attempt to perform the SMS algorithm on the specified loop. This function is
756 /// the main entry point for the algorithm. The function identifies candidate
757 /// loops, calculates the minimum initiation interval, and attempts to schedule
758 /// the loop.
759 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
760  bool Changed = false;
761  for (auto &InnerLoop : L)
762  Changed |= scheduleLoop(*InnerLoop);
763 
764 #ifndef NDEBUG
765  // Stop trying after reaching the limit (if any).
766  int Limit = SwpLoopLimit;
767  if (Limit >= 0) {
768  if (NumTries >= SwpLoopLimit)
769  return Changed;
770  NumTries++;
771  }
772 #endif
773 
774  if (!canPipelineLoop(L))
775  return Changed;
776 
777  ++NumTrytoPipeline;
778 
779  Changed = swingModuloScheduler(L);
780 
781  return Changed;
782 }
783 
784 /// Return true if the loop can be software pipelined. The algorithm is
785 /// restricted to loops with a single basic block. Make sure that the
786 /// branch in the loop can be analyzed.
787 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
788  if (L.getNumBlocks() != 1)
789  return false;
790 
791  // Check if the branch can't be understood because we can't do pipelining
792  // if that's the case.
793  LI.TBB = nullptr;
794  LI.FBB = nullptr;
795  LI.BrCond.clear();
796  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
797  return false;
798 
799  LI.LoopInductionVar = nullptr;
800  LI.LoopCompare = nullptr;
801  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
802  return false;
803 
804  if (!L.getLoopPreheader())
805  return false;
806 
807  // If any of the Phis contain subregs, then we can't pipeline
808  // because we don't know how to maintain subreg information in the
809  // VMap structure.
810  MachineBasicBlock *MBB = L.getHeader();
811  for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
812  BBE = MBB->getFirstNonPHI();
813  BBI != BBE; ++BBI)
814  for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
815  if (BBI->getOperand(i).getSubReg() != 0)
816  return false;
817 
818  return true;
819 }
820 
821 /// The SMS algorithm consists of the following main steps:
822 /// 1. Computation and analysis of the dependence graph.
823 /// 2. Ordering of the nodes (instructions).
824 /// 3. Attempt to Schedule the loop.
825 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
826  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
827 
828  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
829 
830  MachineBasicBlock *MBB = L.getHeader();
831  // The kernel should not include any terminator instructions. These
832  // will be added back later.
833  SMS.startBlock(MBB);
834 
835  // Compute the number of 'real' instructions in the basic block by
836  // ignoring terminators.
837  unsigned size = MBB->size();
839  E = MBB->instr_end();
840  I != E; ++I, --size)
841  ;
842 
843  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
844  SMS.schedule();
845  SMS.exitRegion();
846 
847  SMS.finishBlock();
848  return SMS.hasNewSchedule();
849 }
850 
851 /// We override the schedule function in ScheduleDAGInstrs to implement the
852 /// scheduling part of the Swing Modulo Scheduling algorithm.
853 void SwingSchedulerDAG::schedule() {
854  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
855  buildSchedGraph(AA);
856  addLoopCarriedDependences(AA);
857  updatePhiDependences();
859  postprocessDAG();
860  changeDependences();
861  DEBUG({
862  for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
863  SUnits[su].dumpAll(this);
864  });
865 
866  NodeSetType NodeSets;
867  findCircuits(NodeSets);
868 
869  // Calculate the MII.
870  unsigned ResMII = calculateResMII();
871  unsigned RecMII = calculateRecMII(NodeSets);
872 
873  fuseRecs(NodeSets);
874 
875  // This flag is used for testing and can cause correctness problems.
876  if (SwpIgnoreRecMII)
877  RecMII = 0;
878 
879  MII = std::max(ResMII, RecMII);
880  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
881  << ")\n");
882 
883  // Can't schedule a loop without a valid MII.
884  if (MII == 0)
885  return;
886 
887  // Don't pipeline large loops.
888  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
889  return;
890 
891  computeNodeFunctions(NodeSets);
892 
893  registerPressureFilter(NodeSets);
894 
895  colocateNodeSets(NodeSets);
896 
897  checkNodeSets(NodeSets);
898 
899  DEBUG({
900  for (auto &I : NodeSets) {
901  dbgs() << " Rec NodeSet ";
902  I.dump();
903  }
904  });
905 
906  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
907 
908  groupRemainingNodes(NodeSets);
909 
910  removeDuplicateNodes(NodeSets);
911 
912  DEBUG({
913  for (auto &I : NodeSets) {
914  dbgs() << " NodeSet ";
915  I.dump();
916  }
917  });
918 
919  computeNodeOrder(NodeSets);
920 
921  SMSchedule Schedule(Pass.MF);
922  Scheduled = schedulePipeline(Schedule);
923 
924  if (!Scheduled)
925  return;
926 
927  unsigned numStages = Schedule.getMaxStageCount();
928  // No need to generate pipeline if there are no overlapped iterations.
929  if (numStages == 0)
930  return;
931 
932  // Check that the maximum stage count is less than user-defined limit.
933  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
934  return;
935 
936  generatePipelinedLoop(Schedule);
937  ++NumPipelined;
938 }
939 
940 /// Clean up after the software pipeliner runs.
941 void SwingSchedulerDAG::finishBlock() {
942  for (MachineInstr *I : NewMIs)
943  MF.DeleteMachineInstr(I);
944  NewMIs.clear();
945 
946  // Call the superclass.
948 }
949 
950 /// Return the register values for the operands of a Phi instruction.
951 /// This function assume the instruction is a Phi.
952 static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
953  unsigned &InitVal, unsigned &LoopVal) {
954  assert(Phi.isPHI() && "Expecting a Phi.");
955 
956  InitVal = 0;
957  LoopVal = 0;
958  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
959  if (Phi.getOperand(i + 1).getMBB() != Loop)
960  InitVal = Phi.getOperand(i).getReg();
961  else
962  LoopVal = Phi.getOperand(i).getReg();
963 
964  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
965 }
966 
967 /// Return the Phi register value that comes from the incoming block.
968 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
969  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
970  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
971  return Phi.getOperand(i).getReg();
972  return 0;
973 }
974 
975 /// Return the Phi register value that comes the the loop block.
976 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
977  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
978  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
979  return Phi.getOperand(i).getReg();
980  return 0;
981 }
982 
983 /// Return true if SUb can be reached from SUa following the chain edges.
984 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
985  SmallPtrSet<SUnit *, 8> Visited;
986  SmallVector<SUnit *, 8> Worklist;
987  Worklist.push_back(SUa);
988  while (!Worklist.empty()) {
989  const SUnit *SU = Worklist.pop_back_val();
990  for (auto &SI : SU->Succs) {
991  SUnit *SuccSU = SI.getSUnit();
992  if (SI.getKind() == SDep::Order) {
993  if (Visited.count(SuccSU))
994  continue;
995  if (SuccSU == SUb)
996  return true;
997  Worklist.push_back(SuccSU);
998  Visited.insert(SuccSU);
999  }
1000  }
1001  }
1002  return false;
1003 }
1004 
1005 /// Return true if the instruction causes a chain between memory
1006 /// references before and after it.
1008  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1009  (MI.hasOrderedMemoryRef() &&
1010  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1011 }
1012 
1013 /// Return the underlying objects for the memory references of an instruction.
1014 /// This function calls the code in ValueTracking, but first checks that the
1015 /// instruction has a memory operand.
1018  const DataLayout &DL) {
1019  if (!MI->hasOneMemOperand())
1020  return;
1021  MachineMemOperand *MM = *MI->memoperands_begin();
1022  if (!MM->getValue())
1023  return;
1024  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1025 }
1026 
1027 /// Add a chain edge between a load and store if the store can be an
1028 /// alias of the load on a subsequent iteration, i.e., a loop carried
1029 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1030 /// but that code doesn't create loop carried dependences.
1031 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1033  for (auto &SU : SUnits) {
1034  MachineInstr &MI = *SU.getInstr();
1035  if (isDependenceBarrier(MI, AA))
1036  PendingLoads.clear();
1037  else if (MI.mayLoad()) {
1039  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1040  for (auto V : Objs) {
1041  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1042  SUs.push_back(&SU);
1043  }
1044  } else if (MI.mayStore()) {
1046  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1047  for (auto V : Objs) {
1049  PendingLoads.find(V);
1050  if (I == PendingLoads.end())
1051  continue;
1052  for (auto Load : I->second) {
1053  if (isSuccOrder(Load, &SU))
1054  continue;
1055  MachineInstr &LdMI = *Load->getInstr();
1056  // First, perform the cheaper check that compares the base register.
1057  // If they are the same and the load offset is less than the store
1058  // offset, then mark the dependence as loop carried potentially.
1059  unsigned BaseReg1, BaseReg2;
1060  int64_t Offset1, Offset2;
1061  if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
1062  !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
1064  continue;
1065  }
1066  if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
1067  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1068  "What happened to the chain edge?");
1070  continue;
1071  }
1072  // Second, the more expensive check that uses alias analysis on the
1073  // base registers. If they alias, and the load offset is less than
1074  // the store offset, the mark the dependence as loop carried.
1075  if (!AA) {
1077  continue;
1078  }
1079  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1080  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1081  if (!MMO1->getValue() || !MMO2->getValue()) {
1083  continue;
1084  }
1085  if (MMO1->getValue() == MMO2->getValue() &&
1086  MMO1->getOffset() <= MMO2->getOffset()) {
1088  continue;
1089  }
1090  AliasResult AAResult = AA->alias(
1092  MMO1->getAAInfo()),
1094  MMO2->getAAInfo()));
1095 
1096  if (AAResult != NoAlias)
1098  }
1099  }
1100  }
1101  }
1102 }
1103 
1104 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1105 /// processes dependences for PHIs. This function adds true dependences
1106 /// from a PHI to a use, and a loop carried dependence from the use to the
1107 /// PHI. The loop carried dependence is represented as an anti dependence
1108 /// edge. This function also removes chain dependences between unrelated
1109 /// PHIs.
1110 void SwingSchedulerDAG::updatePhiDependences() {
1111  SmallVector<SDep, 4> RemoveDeps;
1113 
1114  // Iterate over each DAG node.
1115  for (SUnit &I : SUnits) {
1116  RemoveDeps.clear();
1117  // Set to true if the instruction has an operand defined by a Phi.
1118  unsigned HasPhiUse = 0;
1119  unsigned HasPhiDef = 0;
1120  MachineInstr *MI = I.getInstr();
1121  // Iterate over each operand, and we process the definitions.
1122  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1123  MOE = MI->operands_end();
1124  MOI != MOE; ++MOI) {
1125  if (!MOI->isReg())
1126  continue;
1127  unsigned Reg = MOI->getReg();
1128  if (MOI->isDef()) {
1129  // If the register is used by a Phi, then create an anti dependence.
1131  UI = MRI.use_instr_begin(Reg),
1132  UE = MRI.use_instr_end();
1133  UI != UE; ++UI) {
1134  MachineInstr *UseMI = &*UI;
1135  SUnit *SU = getSUnit(UseMI);
1136  if (SU != nullptr && UseMI->isPHI()) {
1137  if (!MI->isPHI()) {
1138  SDep Dep(SU, SDep::Anti, Reg);
1139  I.addPred(Dep);
1140  } else {
1141  HasPhiDef = Reg;
1142  // Add a chain edge to a dependent Phi that isn't an existing
1143  // predecessor.
1144  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1145  I.addPred(SDep(SU, SDep::Barrier));
1146  }
1147  }
1148  }
1149  } else if (MOI->isUse()) {
1150  // If the register is defined by a Phi, then create a true dependence.
1151  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1152  if (DefMI == nullptr)
1153  continue;
1154  SUnit *SU = getSUnit(DefMI);
1155  if (SU != nullptr && DefMI->isPHI()) {
1156  if (!MI->isPHI()) {
1157  SDep Dep(SU, SDep::Data, Reg);
1158  Dep.setLatency(0);
1159  ST.adjustSchedDependency(SU, &I, Dep);
1160  I.addPred(Dep);
1161  } else {
1162  HasPhiUse = Reg;
1163  // Add a chain edge to a dependent Phi that isn't an existing
1164  // predecessor.
1165  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1166  I.addPred(SDep(SU, SDep::Barrier));
1167  }
1168  }
1169  }
1170  }
1171  // Remove order dependences from an unrelated Phi.
1172  if (!SwpPruneDeps)
1173  continue;
1174  for (auto &PI : I.Preds) {
1175  MachineInstr *PMI = PI.getSUnit()->getInstr();
1176  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1177  if (I.getInstr()->isPHI()) {
1178  if (PMI->getOperand(0).getReg() == HasPhiUse)
1179  continue;
1180  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1181  continue;
1182  }
1183  RemoveDeps.push_back(PI);
1184  }
1185  }
1186  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1187  I.removePred(RemoveDeps[i]);
1188  }
1189 }
1190 
1191 /// Iterate over each DAG node and see if we can change any dependences
1192 /// in order to reduce the recurrence MII.
1193 void SwingSchedulerDAG::changeDependences() {
1194  // See if an instruction can use a value from the previous iteration.
1195  // If so, we update the base and offset of the instruction and change
1196  // the dependences.
1197  for (SUnit &I : SUnits) {
1198  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1199  int64_t NewOffset = 0;
1200  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1201  NewOffset))
1202  continue;
1203 
1204  // Get the MI and SUnit for the instruction that defines the original base.
1205  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1206  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1207  if (!DefMI)
1208  continue;
1209  SUnit *DefSU = getSUnit(DefMI);
1210  if (!DefSU)
1211  continue;
1212  // Get the MI and SUnit for the instruction that defins the new base.
1213  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1214  if (!LastMI)
1215  continue;
1216  SUnit *LastSU = getSUnit(LastMI);
1217  if (!LastSU)
1218  continue;
1219 
1220  if (Topo.IsReachable(&I, LastSU))
1221  continue;
1222 
1223  // Remove the dependence. The value now depends on a prior iteration.
1224  SmallVector<SDep, 4> Deps;
1225  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1226  ++P)
1227  if (P->getSUnit() == DefSU)
1228  Deps.push_back(*P);
1229  for (int i = 0, e = Deps.size(); i != e; i++) {
1230  Topo.RemovePred(&I, Deps[i].getSUnit());
1231  I.removePred(Deps[i]);
1232  }
1233  // Remove the chain dependence between the instructions.
1234  Deps.clear();
1235  for (auto &P : LastSU->Preds)
1236  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1237  Deps.push_back(P);
1238  for (int i = 0, e = Deps.size(); i != e; i++) {
1239  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1240  LastSU->removePred(Deps[i]);
1241  }
1242 
1243  // Add a dependence between the new instruction and the instruction
1244  // that defines the new base.
1245  SDep Dep(&I, SDep::Anti, NewBase);
1246  LastSU->addPred(Dep);
1247 
1248  // Remember the base and offset information so that we can update the
1249  // instruction during code generation.
1250  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1251  }
1252 }
1253 
1254 namespace {
1255 
1256 // FuncUnitSorter - Comparison operator used to sort instructions by
1257 // the number of functional unit choices.
1258 struct FuncUnitSorter {
1259  const InstrItineraryData *InstrItins;
1260  DenseMap<unsigned, unsigned> Resources;
1261 
1262  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1263 
1264  // Compute the number of functional unit alternatives needed
1265  // at each stage, and take the minimum value. We prioritize the
1266  // instructions by the least number of choices first.
1267  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1268  unsigned schedClass = Inst->getDesc().getSchedClass();
1269  unsigned min = UINT_MAX;
1270  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1271  *IE = InstrItins->endStage(schedClass);
1272  IS != IE; ++IS) {
1273  unsigned funcUnits = IS->getUnits();
1274  unsigned numAlternatives = countPopulation(funcUnits);
1275  if (numAlternatives < min) {
1276  min = numAlternatives;
1277  F = funcUnits;
1278  }
1279  }
1280  return min;
1281  }
1282 
1283  // Compute the critical resources needed by the instruction. This
1284  // function records the functional units needed by instructions that
1285  // must use only one functional unit. We use this as a tie breaker
1286  // for computing the resource MII. The instrutions that require
1287  // the same, highly used, functional unit have high priority.
1288  void calcCriticalResources(MachineInstr &MI) {
1289  unsigned SchedClass = MI.getDesc().getSchedClass();
1290  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1291  *IE = InstrItins->endStage(SchedClass);
1292  IS != IE; ++IS) {
1293  unsigned FuncUnits = IS->getUnits();
1294  if (countPopulation(FuncUnits) == 1)
1295  Resources[FuncUnits]++;
1296  }
1297  }
1298 
1299  /// Return true if IS1 has less priority than IS2.
1300  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1301  unsigned F1 = 0, F2 = 0;
1302  unsigned MFUs1 = minFuncUnits(IS1, F1);
1303  unsigned MFUs2 = minFuncUnits(IS2, F2);
1304  if (MFUs1 == 1 && MFUs2 == 1)
1305  return Resources.lookup(F1) < Resources.lookup(F2);
1306  return MFUs1 > MFUs2;
1307  }
1308 };
1309 
1310 } // end anonymous namespace
1311 
1312 /// Calculate the resource constrained minimum initiation interval for the
1313 /// specified loop. We use the DFA to model the resources needed for
1314 /// each instruction, and we ignore dependences. A different DFA is created
1315 /// for each cycle that is required. When adding a new instruction, we attempt
1316 /// to add it to each existing DFA, until a legal space is found. If the
1317 /// instruction cannot be reserved in an existing DFA, we create a new one.
1318 unsigned SwingSchedulerDAG::calculateResMII() {
1320  MachineBasicBlock *MBB = Loop.getHeader();
1321  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1322 
1323  // Sort the instructions by the number of available choices for scheduling,
1324  // least to most. Use the number of critical resources as the tie breaker.
1325  FuncUnitSorter FUS =
1326  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1328  E = MBB->getFirstTerminator();
1329  I != E; ++I)
1330  FUS.calcCriticalResources(*I);
1332  FuncUnitOrder(FUS);
1333 
1335  E = MBB->getFirstTerminator();
1336  I != E; ++I)
1337  FuncUnitOrder.push(&*I);
1338 
1339  while (!FuncUnitOrder.empty()) {
1340  MachineInstr *MI = FuncUnitOrder.top();
1341  FuncUnitOrder.pop();
1342  if (TII->isZeroCost(MI->getOpcode()))
1343  continue;
1344  // Attempt to reserve the instruction in an existing DFA. At least one
1345  // DFA is needed for each cycle.
1346  unsigned NumCycles = getSUnit(MI)->Latency;
1347  unsigned ReservedCycles = 0;
1350  for (unsigned C = 0; C < NumCycles; ++C)
1351  while (RI != RE) {
1352  if ((*RI++)->canReserveResources(*MI)) {
1353  ++ReservedCycles;
1354  break;
1355  }
1356  }
1357  // Start reserving resources using existing DFAs.
1358  for (unsigned C = 0; C < ReservedCycles; ++C) {
1359  --RI;
1360  (*RI)->reserveResources(*MI);
1361  }
1362  // Add new DFAs, if needed, to reserve resources.
1363  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1364  DFAPacketizer *NewResource =
1366  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1367  NewResource->reserveResources(*MI);
1368  Resources.push_back(NewResource);
1369  }
1370  }
1371  int Resmii = Resources.size();
1372  // Delete the memory for each of the DFAs that were created earlier.
1373  for (DFAPacketizer *RI : Resources) {
1374  DFAPacketizer *D = RI;
1375  delete D;
1376  }
1377  Resources.clear();
1378  return Resmii;
1379 }
1380 
1381 /// Calculate the recurrence-constrainted minimum initiation interval.
1382 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1383 /// for each circuit. The II needs to satisfy the inequality
1384 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1385 /// II that satistifies the inequality, and the RecMII is the maximum
1386 /// of those values.
1387 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1388  unsigned RecMII = 0;
1389 
1390  for (NodeSet &Nodes : NodeSets) {
1391  if (Nodes.empty())
1392  continue;
1393 
1394  unsigned Delay = Nodes.size() - 1;
1395  unsigned Distance = 1;
1396 
1397  // ii = ceil(delay / distance)
1398  unsigned CurMII = (Delay + Distance - 1) / Distance;
1399  Nodes.setRecMII(CurMII);
1400  if (CurMII > RecMII)
1401  RecMII = CurMII;
1402  }
1403 
1404  return RecMII;
1405 }
1406 
1407 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1408 /// but we do this to find the circuits, and then change them back.
1409 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1411  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1412  SUnit *SU = &SUnits[i];
1413  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1414  IP != EP; ++IP) {
1415  if (IP->getKind() != SDep::Anti)
1416  continue;
1417  DepsAdded.push_back(std::make_pair(SU, *IP));
1418  }
1419  }
1420  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1421  E = DepsAdded.end();
1422  I != E; ++I) {
1423  // Remove this anti dependency and add one in the reverse direction.
1424  SUnit *SU = I->first;
1425  SDep &D = I->second;
1426  SUnit *TargetSU = D.getSUnit();
1427  unsigned Reg = D.getReg();
1428  unsigned Lat = D.getLatency();
1429  SU->removePred(D);
1430  SDep Dep(SU, SDep::Anti, Reg);
1431  Dep.setLatency(Lat);
1432  TargetSU->addPred(Dep);
1433  }
1434 }
1435 
1436 /// Create the adjacency structure of the nodes in the graph.
1437 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1438  SwingSchedulerDAG *DAG) {
1439  BitVector Added(SUnits.size());
1440  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1441  Added.reset();
1442  // Add any successor to the adjacency matrix and exclude duplicates.
1443  for (auto &SI : SUnits[i].Succs) {
1444  // Do not process a boundary node and a back-edge is processed only
1445  // if it goes to a Phi.
1446  if (SI.getSUnit()->isBoundaryNode() ||
1447  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1448  continue;
1449  int N = SI.getSUnit()->NodeNum;
1450  if (!Added.test(N)) {
1451  AdjK[i].push_back(N);
1452  Added.set(N);
1453  }
1454  }
1455  // A chain edge between a store and a load is treated as a back-edge in the
1456  // adjacency matrix.
1457  for (auto &PI : SUnits[i].Preds) {
1458  if (!SUnits[i].getInstr()->mayStore() ||
1459  !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
1460  continue;
1461  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1462  int N = PI.getSUnit()->NodeNum;
1463  if (!Added.test(N)) {
1464  AdjK[i].push_back(N);
1465  Added.set(N);
1466  }
1467  }
1468  }
1469  }
1470 }
1471 
1472 /// Identify an elementary circuit in the dependence graph starting at the
1473 /// specified node.
1474 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1475  bool HasBackedge) {
1476  SUnit *SV = &SUnits[V];
1477  bool F = false;
1478  Stack.insert(SV);
1479  Blocked.set(V);
1480 
1481  for (auto W : AdjK[V]) {
1482  if (NumPaths > MaxPaths)
1483  break;
1484  if (W < S)
1485  continue;
1486  if (W == S) {
1487  if (!HasBackedge)
1488  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1489  F = true;
1490  ++NumPaths;
1491  break;
1492  } else if (!Blocked.test(W)) {
1493  if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
1494  F = true;
1495  }
1496  }
1497 
1498  if (F)
1499  unblock(V);
1500  else {
1501  for (auto W : AdjK[V]) {
1502  if (W < S)
1503  continue;
1504  if (B[W].count(SV) == 0)
1505  B[W].insert(SV);
1506  }
1507  }
1508  Stack.pop_back();
1509  return F;
1510 }
1511 
1512 /// Unblock a node in the circuit finding algorithm.
1513 void SwingSchedulerDAG::Circuits::unblock(int U) {
1514  Blocked.reset(U);
1515  SmallPtrSet<SUnit *, 4> &BU = B[U];
1516  while (!BU.empty()) {
1518  assert(SI != BU.end() && "Invalid B set.");
1519  SUnit *W = *SI;
1520  BU.erase(W);
1521  if (Blocked.test(W->NodeNum))
1522  unblock(W->NodeNum);
1523  }
1524 }
1525 
1526 /// Identify all the elementary circuits in the dependence graph using
1527 /// Johnson's circuit algorithm.
1528 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1529  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1530  // but we do this to find the circuits, and then change them back.
1531  swapAntiDependences(SUnits);
1532 
1533  Circuits Cir(SUnits);
1534  // Create the adjacency structure.
1535  Cir.createAdjacencyStructure(this);
1536  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1537  Cir.reset();
1538  Cir.circuit(i, i, NodeSets);
1539  }
1540 
1541  // Change the dependences back so that we've created a DAG again.
1542  swapAntiDependences(SUnits);
1543 }
1544 
1545 /// Return true for DAG nodes that we ignore when computing the cost functions.
1546 /// We ignore the back-edge recurrence in order to avoid unbounded recurison
1547 /// in the calculation of the ASAP, ALAP, etc functions.
1548 static bool ignoreDependence(const SDep &D, bool isPred) {
1549  if (D.isArtificial())
1550  return true;
1551  return D.getKind() == SDep::Anti && isPred;
1552 }
1553 
1554 /// Compute several functions need to order the nodes for scheduling.
1555 /// ASAP - Earliest time to schedule a node.
1556 /// ALAP - Latest time to schedule a node.
1557 /// MOV - Mobility function, difference between ALAP and ASAP.
1558 /// D - Depth of each node.
1559 /// H - Height of each node.
1560 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1561  ScheduleInfo.resize(SUnits.size());
1562 
1563  DEBUG({
1565  E = Topo.end();
1566  I != E; ++I) {
1567  SUnit *SU = &SUnits[*I];
1568  SU->dump(this);
1569  }
1570  });
1571 
1572  int maxASAP = 0;
1573  // Compute ASAP.
1575  E = Topo.end();
1576  I != E; ++I) {
1577  int asap = 0;
1578  SUnit *SU = &SUnits[*I];
1579  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1580  EP = SU->Preds.end();
1581  IP != EP; ++IP) {
1582  if (ignoreDependence(*IP, true))
1583  continue;
1584  SUnit *pred = IP->getSUnit();
1585  asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
1586  getDistance(pred, SU, *IP) * MII));
1587  }
1588  maxASAP = std::max(maxASAP, asap);
1589  ScheduleInfo[*I].ASAP = asap;
1590  }
1591 
1592  // Compute ALAP and MOV.
1594  E = Topo.rend();
1595  I != E; ++I) {
1596  int alap = maxASAP;
1597  SUnit *SU = &SUnits[*I];
1598  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1599  ES = SU->Succs.end();
1600  IS != ES; ++IS) {
1601  if (ignoreDependence(*IS, true))
1602  continue;
1603  SUnit *succ = IS->getSUnit();
1604  alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
1605  getDistance(SU, succ, *IS) * MII));
1606  }
1607 
1608  ScheduleInfo[*I].ALAP = alap;
1609  }
1610 
1611  // After computing the node functions, compute the summary for each node set.
1612  for (NodeSet &I : NodeSets)
1613  I.computeNodeSetInfo(this);
1614 
1615  DEBUG({
1616  for (unsigned i = 0; i < SUnits.size(); i++) {
1617  dbgs() << "\tNode " << i << ":\n";
1618  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1619  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1620  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1621  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1622  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1623  }
1624  });
1625 }
1626 
1627 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1628 /// as the predecessors of the elements of NodeOrder that are not also in
1629 /// NodeOrder.
1630 static bool pred_L(SetVector<SUnit *> &NodeOrder,
1632  const NodeSet *S = nullptr) {
1633  Preds.clear();
1634  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1635  I != E; ++I) {
1636  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1637  PI != PE; ++PI) {
1638  if (S && S->count(PI->getSUnit()) == 0)
1639  continue;
1640  if (ignoreDependence(*PI, true))
1641  continue;
1642  if (NodeOrder.count(PI->getSUnit()) == 0)
1643  Preds.insert(PI->getSUnit());
1644  }
1645  // Back-edges are predecessors with an anti-dependence.
1646  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1647  ES = (*I)->Succs.end();
1648  IS != ES; ++IS) {
1649  if (IS->getKind() != SDep::Anti)
1650  continue;
1651  if (S && S->count(IS->getSUnit()) == 0)
1652  continue;
1653  if (NodeOrder.count(IS->getSUnit()) == 0)
1654  Preds.insert(IS->getSUnit());
1655  }
1656  }
1657  return !Preds.empty();
1658 }
1659 
1660 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1661 /// as the successors of the elements of NodeOrder that are not also in
1662 /// NodeOrder.
1663 static bool succ_L(SetVector<SUnit *> &NodeOrder,
1665  const NodeSet *S = nullptr) {
1666  Succs.clear();
1667  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1668  I != E; ++I) {
1669  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1670  SI != SE; ++SI) {
1671  if (S && S->count(SI->getSUnit()) == 0)
1672  continue;
1673  if (ignoreDependence(*SI, false))
1674  continue;
1675  if (NodeOrder.count(SI->getSUnit()) == 0)
1676  Succs.insert(SI->getSUnit());
1677  }
1678  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1679  PE = (*I)->Preds.end();
1680  PI != PE; ++PI) {
1681  if (PI->getKind() != SDep::Anti)
1682  continue;
1683  if (S && S->count(PI->getSUnit()) == 0)
1684  continue;
1685  if (NodeOrder.count(PI->getSUnit()) == 0)
1686  Succs.insert(PI->getSUnit());
1687  }
1688  }
1689  return !Succs.empty();
1690 }
1691 
1692 /// Return true if there is a path from the specified node to any of the nodes
1693 /// in DestNodes. Keep track and return the nodes in any path.
1694 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1695  SetVector<SUnit *> &DestNodes,
1696  SetVector<SUnit *> &Exclude,
1697  SmallPtrSet<SUnit *, 8> &Visited) {
1698  if (Cur->isBoundaryNode())
1699  return false;
1700  if (Exclude.count(Cur) != 0)
1701  return false;
1702  if (DestNodes.count(Cur) != 0)
1703  return true;
1704  if (!Visited.insert(Cur).second)
1705  return Path.count(Cur) != 0;
1706  bool FoundPath = false;
1707  for (auto &SI : Cur->Succs)
1708  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1709  for (auto &PI : Cur->Preds)
1710  if (PI.getKind() == SDep::Anti)
1711  FoundPath |=
1712  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1713  if (FoundPath)
1714  Path.insert(Cur);
1715  return FoundPath;
1716 }
1717 
1718 /// Return true if Set1 is a subset of Set2.
1719 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1720  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1721  if (Set2.count(*I) == 0)
1722  return false;
1723  return true;
1724 }
1725 
1726 /// Compute the live-out registers for the instructions in a node-set.
1727 /// The live-out registers are those that are defined in the node-set,
1728 /// but not used. Except for use operands of Phis.
1730  NodeSet &NS) {
1731  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1732  MachineRegisterInfo &MRI = MF.getRegInfo();
1734  SmallSet<unsigned, 4> Uses;
1735  for (SUnit *SU : NS) {
1736  const MachineInstr *MI = SU->getInstr();
1737  if (MI->isPHI())
1738  continue;
1739  for (const MachineOperand &MO : MI->operands())
1740  if (MO.isReg() && MO.isUse()) {
1741  unsigned Reg = MO.getReg();
1743  Uses.insert(Reg);
1744  else if (MRI.isAllocatable(Reg))
1745  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1746  Uses.insert(*Units);
1747  }
1748  }
1749  for (SUnit *SU : NS)
1750  for (const MachineOperand &MO : SU->getInstr()->operands())
1751  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1752  unsigned Reg = MO.getReg();
1754  if (!Uses.count(Reg))
1755  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1757  } else if (MRI.isAllocatable(Reg)) {
1758  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1759  if (!Uses.count(*Units))
1760  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1762  }
1763  }
1764  RPTracker.addLiveRegs(LiveOutRegs);
1765 }
1766 
1767 /// A heuristic to filter nodes in recurrent node-sets if the register
1768 /// pressure of a set is too high.
1769 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1770  for (auto &NS : NodeSets) {
1771  // Skip small node-sets since they won't cause register pressure problems.
1772  if (NS.size() <= 2)
1773  continue;
1774  IntervalPressure RecRegPressure;
1775  RegPressureTracker RecRPTracker(RecRegPressure);
1776  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1777  computeLiveOuts(MF, RecRPTracker, NS);
1778  RecRPTracker.closeBottom();
1779 
1780  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1781  std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
1782  return A->NodeNum > B->NodeNum;
1783  });
1784 
1785  for (auto &SU : SUnits) {
1786  // Since we're computing the register pressure for a subset of the
1787  // instructions in a block, we need to set the tracker for each
1788  // instruction in the node-set. The tracker is set to the instruction
1789  // just after the one we're interested in.
1791  RecRPTracker.setPos(std::next(CurInstI));
1792 
1793  RegPressureDelta RPDelta;
1794  ArrayRef<PressureChange> CriticalPSets;
1795  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1796  CriticalPSets,
1797  RecRegPressure.MaxSetPressure);
1798  if (RPDelta.Excess.isValid()) {
1799  DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1800  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1801  << ":" << RPDelta.Excess.getUnitInc());
1802  NS.setExceedPressure(SU);
1803  break;
1804  }
1805  RecRPTracker.recede();
1806  }
1807  }
1808 }
1809 
1810 /// A heuristic to colocate node sets that have the same set of
1811 /// successors.
1812 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1813  unsigned Colocate = 0;
1814  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1815  NodeSet &N1 = NodeSets[i];
1817  if (N1.empty() || !succ_L(N1, S1))
1818  continue;
1819  for (int j = i + 1; j < e; ++j) {
1820  NodeSet &N2 = NodeSets[j];
1821  if (N1.compareRecMII(N2) != 0)
1822  continue;
1824  if (N2.empty() || !succ_L(N2, S2))
1825  continue;
1826  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1827  N1.setColocate(++Colocate);
1828  N2.setColocate(Colocate);
1829  break;
1830  }
1831  }
1832  }
1833 }
1834 
1835 /// Check if the existing node-sets are profitable. If not, then ignore the
1836 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1837 /// a heuristic. If the MII is large and there is a non-recurrent node with
1838 /// a large depth compared to the MII, then it's best to try and schedule
1839 /// all instruction together instead of starting with the recurrent node-sets.
1840 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1841  // Look for loops with a large MII.
1842  if (MII <= 20)
1843  return;
1844  // Check if the node-set contains only a simple add recurrence.
1845  for (auto &NS : NodeSets)
1846  if (NS.size() > 2)
1847  return;
1848  // If the depth of any instruction is significantly larger than the MII, then
1849  // ignore the recurrent node-sets and treat all instructions equally.
1850  for (auto &SU : SUnits)
1851  if (SU.getDepth() > MII * 1.5) {
1852  NodeSets.clear();
1853  DEBUG(dbgs() << "Clear recurrence node-sets\n");
1854  return;
1855  }
1856 }
1857 
1858 /// Add the nodes that do not belong to a recurrence set into groups
1859 /// based upon connected componenets.
1860 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1861  SetVector<SUnit *> NodesAdded;
1862  SmallPtrSet<SUnit *, 8> Visited;
1863  // Add the nodes that are on a path between the previous node sets and
1864  // the current node set.
1865  for (NodeSet &I : NodeSets) {
1867  // Add the nodes from the current node set to the previous node set.
1868  if (succ_L(I, N)) {
1869  SetVector<SUnit *> Path;
1870  for (SUnit *NI : N) {
1871  Visited.clear();
1872  computePath(NI, Path, NodesAdded, I, Visited);
1873  }
1874  if (!Path.empty())
1875  I.insert(Path.begin(), Path.end());
1876  }
1877  // Add the nodes from the previous node set to the current node set.
1878  N.clear();
1879  if (succ_L(NodesAdded, N)) {
1880  SetVector<SUnit *> Path;
1881  for (SUnit *NI : N) {
1882  Visited.clear();
1883  computePath(NI, Path, I, NodesAdded, Visited);
1884  }
1885  if (!Path.empty())
1886  I.insert(Path.begin(), Path.end());
1887  }
1888  NodesAdded.insert(I.begin(), I.end());
1889  }
1890 
1891  // Create a new node set with the connected nodes of any successor of a node
1892  // in a recurrent set.
1893  NodeSet NewSet;
1895  if (succ_L(NodesAdded, N))
1896  for (SUnit *I : N)
1897  addConnectedNodes(I, NewSet, NodesAdded);
1898  if (!NewSet.empty())
1899  NodeSets.push_back(NewSet);
1900 
1901  // Create a new node set with the connected nodes of any predecessor of a node
1902  // in a recurrent set.
1903  NewSet.clear();
1904  if (pred_L(NodesAdded, N))
1905  for (SUnit *I : N)
1906  addConnectedNodes(I, NewSet, NodesAdded);
1907  if (!NewSet.empty())
1908  NodeSets.push_back(NewSet);
1909 
1910  // Create new nodes sets with the connected nodes any any remaining node that
1911  // has no predecessor.
1912  for (unsigned i = 0; i < SUnits.size(); ++i) {
1913  SUnit *SU = &SUnits[i];
1914  if (NodesAdded.count(SU) == 0) {
1915  NewSet.clear();
1916  addConnectedNodes(SU, NewSet, NodesAdded);
1917  if (!NewSet.empty())
1918  NodeSets.push_back(NewSet);
1919  }
1920  }
1921 }
1922 
1923 /// Add the node to the set, and add all is its connected nodes to the set.
1924 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1925  SetVector<SUnit *> &NodesAdded) {
1926  NewSet.insert(SU);
1927  NodesAdded.insert(SU);
1928  for (auto &SI : SU->Succs) {
1929  SUnit *Successor = SI.getSUnit();
1930  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1931  addConnectedNodes(Successor, NewSet, NodesAdded);
1932  }
1933  for (auto &PI : SU->Preds) {
1934  SUnit *Predecessor = PI.getSUnit();
1935  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1936  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1937  }
1938 }
1939 
1940 /// Return true if Set1 contains elements in Set2. The elements in common
1941 /// are returned in a different container.
1942 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1943  SmallSetVector<SUnit *, 8> &Result) {
1944  Result.clear();
1945  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1946  SUnit *SU = Set1[i];
1947  if (Set2.count(SU) != 0)
1948  Result.insert(SU);
1949  }
1950  return !Result.empty();
1951 }
1952 
1953 /// Merge the recurrence node sets that have the same initial node.
1954 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1955  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1956  ++I) {
1957  NodeSet &NI = *I;
1958  for (NodeSetType::iterator J = I + 1; J != E;) {
1959  NodeSet &NJ = *J;
1960  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1961  if (NJ.compareRecMII(NI) > 0)
1962  NI.setRecMII(NJ.getRecMII());
1963  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1964  ++NII)
1965  I->insert(*NII);
1966  NodeSets.erase(J);
1967  E = NodeSets.end();
1968  } else {
1969  ++J;
1970  }
1971  }
1972  }
1973 }
1974 
1975 /// Remove nodes that have been scheduled in previous NodeSets.
1976 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1977  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1978  ++I)
1979  for (NodeSetType::iterator J = I + 1; J != E;) {
1980  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1981 
1982  if (J->empty()) {
1983  NodeSets.erase(J);
1984  E = NodeSets.end();
1985  } else {
1986  ++J;
1987  }
1988  }
1989 }
1990 
1991 /// Return true if Inst1 defines a value that is used in Inst2.
1992 static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
1993  for (auto &SI : Inst1->Succs)
1994  if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
1995  return true;
1996  return false;
1997 }
1998 
1999 /// Compute an ordered list of the dependence graph nodes, which
2000 /// indicates the order that the nodes will be scheduled. This is a
2001 /// two-level algorithm. First, a partial order is created, which
2002 /// consists of a list of sets ordered from highest to lowest priority.
2003 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2005  NodeOrder.clear();
2006 
2007  for (auto &Nodes : NodeSets) {
2008  DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2009  OrderKind Order;
2011  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2012  R.insert(N.begin(), N.end());
2013  Order = BottomUp;
2014  DEBUG(dbgs() << " Bottom up (preds) ");
2015  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2016  R.insert(N.begin(), N.end());
2017  Order = TopDown;
2018  DEBUG(dbgs() << " Top down (succs) ");
2019  } else if (isIntersect(N, Nodes, R)) {
2020  // If some of the successors are in the existing node-set, then use the
2021  // top-down ordering.
2022  Order = TopDown;
2023  DEBUG(dbgs() << " Top down (intersect) ");
2024  } else if (NodeSets.size() == 1) {
2025  for (auto &N : Nodes)
2026  if (N->Succs.size() == 0)
2027  R.insert(N);
2028  Order = BottomUp;
2029  DEBUG(dbgs() << " Bottom up (all) ");
2030  } else {
2031  // Find the node with the highest ASAP.
2032  SUnit *maxASAP = nullptr;
2033  for (SUnit *SU : Nodes) {
2034  if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
2035  maxASAP = SU;
2036  }
2037  R.insert(maxASAP);
2038  Order = BottomUp;
2039  DEBUG(dbgs() << " Bottom up (default) ");
2040  }
2041 
2042  while (!R.empty()) {
2043  if (Order == TopDown) {
2044  // Choose the node with the maximum height. If more than one, choose
2045  // the node with the lowest MOV. If still more than one, check if there
2046  // is a dependence between the instructions.
2047  while (!R.empty()) {
2048  SUnit *maxHeight = nullptr;
2049  for (SUnit *I : R) {
2050  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2051  maxHeight = I;
2052  else if (getHeight(I) == getHeight(maxHeight) &&
2053  getMOV(I) < getMOV(maxHeight) &&
2054  !hasDataDependence(maxHeight, I))
2055  maxHeight = I;
2056  else if (hasDataDependence(I, maxHeight))
2057  maxHeight = I;
2058  }
2059  NodeOrder.insert(maxHeight);
2060  DEBUG(dbgs() << maxHeight->NodeNum << " ");
2061  R.remove(maxHeight);
2062  for (const auto &I : maxHeight->Succs) {
2063  if (Nodes.count(I.getSUnit()) == 0)
2064  continue;
2065  if (NodeOrder.count(I.getSUnit()) != 0)
2066  continue;
2067  if (ignoreDependence(I, false))
2068  continue;
2069  R.insert(I.getSUnit());
2070  }
2071  // Back-edges are predecessors with an anti-dependence.
2072  for (const auto &I : maxHeight->Preds) {
2073  if (I.getKind() != SDep::Anti)
2074  continue;
2075  if (Nodes.count(I.getSUnit()) == 0)
2076  continue;
2077  if (NodeOrder.count(I.getSUnit()) != 0)
2078  continue;
2079  R.insert(I.getSUnit());
2080  }
2081  }
2082  Order = BottomUp;
2083  DEBUG(dbgs() << "\n Switching order to bottom up ");
2085  if (pred_L(NodeOrder, N, &Nodes))
2086  R.insert(N.begin(), N.end());
2087  } else {
2088  // Choose the node with the maximum depth. If more than one, choose
2089  // the node with the lowest MOV. If there is still more than one, check
2090  // for a dependence between the instructions.
2091  while (!R.empty()) {
2092  SUnit *maxDepth = nullptr;
2093  for (SUnit *I : R) {
2094  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2095  maxDepth = I;
2096  else if (getDepth(I) == getDepth(maxDepth) &&
2097  getMOV(I) < getMOV(maxDepth) &&
2098  !hasDataDependence(I, maxDepth))
2099  maxDepth = I;
2100  else if (hasDataDependence(maxDepth, I))
2101  maxDepth = I;
2102  }
2103  NodeOrder.insert(maxDepth);
2104  DEBUG(dbgs() << maxDepth->NodeNum << " ");
2105  R.remove(maxDepth);
2106  if (Nodes.isExceedSU(maxDepth)) {
2107  Order = TopDown;
2108  R.clear();
2109  R.insert(Nodes.getNode(0));
2110  break;
2111  }
2112  for (const auto &I : maxDepth->Preds) {
2113  if (Nodes.count(I.getSUnit()) == 0)
2114  continue;
2115  if (NodeOrder.count(I.getSUnit()) != 0)
2116  continue;
2117  if (I.getKind() == SDep::Anti)
2118  continue;
2119  R.insert(I.getSUnit());
2120  }
2121  // Back-edges are predecessors with an anti-dependence.
2122  for (const auto &I : maxDepth->Succs) {
2123  if (I.getKind() != SDep::Anti)
2124  continue;
2125  if (Nodes.count(I.getSUnit()) == 0)
2126  continue;
2127  if (NodeOrder.count(I.getSUnit()) != 0)
2128  continue;
2129  R.insert(I.getSUnit());
2130  }
2131  }
2132  Order = TopDown;
2133  DEBUG(dbgs() << "\n Switching order to top down ");
2135  if (succ_L(NodeOrder, N, &Nodes))
2136  R.insert(N.begin(), N.end());
2137  }
2138  }
2139  DEBUG(dbgs() << "\nDone with Nodeset\n");
2140  }
2141 
2142  DEBUG({
2143  dbgs() << "Node order: ";
2144  for (SUnit *I : NodeOrder)
2145  dbgs() << " " << I->NodeNum << " ";
2146  dbgs() << "\n";
2147  });
2148 }
2149 
2150 /// Process the nodes in the computed order and create the pipelined schedule
2151 /// of the instructions, if possible. Return true if a schedule is found.
2152 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2153  if (NodeOrder.empty())
2154  return false;
2155 
2156  bool scheduleFound = false;
2157  // Keep increasing II until a valid schedule is found.
2158  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2159  Schedule.reset();
2160  Schedule.setInitiationInterval(II);
2161  DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2162 
2163  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2164  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2165  do {
2166  SUnit *SU = *NI;
2167 
2168  // Compute the schedule time for the instruction, which is based
2169  // upon the scheduled time for any predecessors/successors.
2170  int EarlyStart = INT_MIN;
2171  int LateStart = INT_MAX;
2172  // These values are set when the size of the schedule window is limited
2173  // due to chain dependences.
2174  int SchedEnd = INT_MAX;
2175  int SchedStart = INT_MIN;
2176  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2177  II, this);
2178  DEBUG({
2179  dbgs() << "Inst (" << SU->NodeNum << ") ";
2180  SU->getInstr()->dump();
2181  dbgs() << "\n";
2182  });
2183  DEBUG({
2184  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2185  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2186  });
2187 
2188  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2189  SchedStart > LateStart)
2190  scheduleFound = false;
2191  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2192  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2193  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2194  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2195  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2196  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2197  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2198  SchedEnd =
2199  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2200  // When scheduling a Phi it is better to start at the late cycle and go
2201  // backwards. The default order may insert the Phi too far away from
2202  // its first dependence.
2203  if (SU->getInstr()->isPHI())
2204  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2205  else
2206  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2207  } else {
2208  int FirstCycle = Schedule.getFirstCycle();
2209  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2210  FirstCycle + getASAP(SU) + II - 1, II);
2211  }
2212  // Even if we find a schedule, make sure the schedule doesn't exceed the
2213  // allowable number of stages. We keep trying if this happens.
2214  if (scheduleFound)
2215  if (SwpMaxStages > -1 &&
2216  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2217  scheduleFound = false;
2218 
2219  DEBUG({
2220  if (!scheduleFound)
2221  dbgs() << "\tCan't schedule\n";
2222  });
2223  } while (++NI != NE && scheduleFound);
2224 
2225  // If a schedule is found, check if it is a valid schedule too.
2226  if (scheduleFound)
2227  scheduleFound = Schedule.isValidSchedule(this);
2228  }
2229 
2230  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2231 
2232  if (scheduleFound)
2233  Schedule.finalizeSchedule(this);
2234  else
2235  Schedule.reset();
2236 
2237  return scheduleFound && Schedule.getMaxStageCount() > 0;
2238 }
2239 
2240 /// Given a schedule for the loop, generate a new version of the loop,
2241 /// and replace the old version. This function generates a prolog
2242 /// that contains the initial iterations in the pipeline, and kernel
2243 /// loop, and the epilogue that contains the code for the final
2244 /// iterations.
2245 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2246  // Create a new basic block for the kernel and add it to the CFG.
2247  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2248 
2249  unsigned MaxStageCount = Schedule.getMaxStageCount();
2250 
2251  // Remember the registers that are used in different stages. The index is
2252  // the iteration, or stage, that the instruction is scheduled in. This is
2253  // a map between register names in the orignal block and the names created
2254  // in each stage of the pipelined loop.
2255  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2256  InstrMapTy InstrMap;
2257 
2259  // Generate the prolog instructions that set up the pipeline.
2260  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2261  MF.insert(BB->getIterator(), KernelBB);
2262 
2263  // Rearrange the instructions to generate the new, pipelined loop,
2264  // and update register names as needed.
2265  for (int Cycle = Schedule.getFirstCycle(),
2266  LastCycle = Schedule.getFinalCycle();
2267  Cycle <= LastCycle; ++Cycle) {
2268  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2269  // This inner loop schedules each instruction in the cycle.
2270  for (SUnit *CI : CycleInstrs) {
2271  if (CI->getInstr()->isPHI())
2272  continue;
2273  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2274  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2275  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2276  KernelBB->push_back(NewMI);
2277  InstrMap[NewMI] = CI->getInstr();
2278  }
2279  }
2280 
2281  // Copy any terminator instructions to the new kernel, and update
2282  // names as needed.
2283  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2284  E = BB->instr_end();
2285  I != E; ++I) {
2286  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2287  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2288  KernelBB->push_back(NewMI);
2289  InstrMap[NewMI] = &*I;
2290  }
2291 
2292  KernelBB->transferSuccessors(BB);
2293  KernelBB->replaceSuccessor(BB, KernelBB);
2294 
2295  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2296  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2297  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2298  InstrMap, MaxStageCount, MaxStageCount, false);
2299 
2300  DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2301 
2303  // Generate the epilog instructions to complete the pipeline.
2304  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2305  PrologBBs);
2306 
2307  // We need this step because the register allocation doesn't handle some
2308  // situations well, so we insert copies to help out.
2309  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2310 
2311  // Remove dead instructions due to loop induction variables.
2312  removeDeadInstructions(KernelBB, EpilogBBs);
2313 
2314  // Add branches between prolog and epilog blocks.
2315  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2316 
2317  // Remove the original loop since it's no longer referenced.
2318  BB->clear();
2319  BB->eraseFromParent();
2320 
2321  delete[] VRMap;
2322 }
2323 
2324 /// Generate the pipeline prolog code.
2325 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2326  MachineBasicBlock *KernelBB,
2327  ValueMapTy *VRMap,
2328  MBBVectorTy &PrologBBs) {
2329  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2330  assert(PreheaderBB != nullptr &&
2331  "Need to add code to handle loops w/o preheader");
2332  MachineBasicBlock *PredBB = PreheaderBB;
2333  InstrMapTy InstrMap;
2334 
2335  // Generate a basic block for each stage, not including the last stage,
2336  // which will be generated in the kernel. Each basic block may contain
2337  // instructions from multiple stages/iterations.
2338  for (unsigned i = 0; i < LastStage; ++i) {
2339  // Create and insert the prolog basic block prior to the original loop
2340  // basic block. The original loop is removed later.
2341  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2342  PrologBBs.push_back(NewBB);
2343  MF.insert(BB->getIterator(), NewBB);
2344  NewBB->transferSuccessors(PredBB);
2345  PredBB->addSuccessor(NewBB);
2346  PredBB = NewBB;
2347 
2348  // Generate instructions for each appropriate stage. Process instructions
2349  // in original program order.
2350  for (int StageNum = i; StageNum >= 0; --StageNum) {
2351  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2352  BBE = BB->getFirstTerminator();
2353  BBI != BBE; ++BBI) {
2354  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2355  if (BBI->isPHI())
2356  continue;
2357  MachineInstr *NewMI =
2358  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2359  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2360  VRMap);
2361  NewBB->push_back(NewMI);
2362  InstrMap[NewMI] = &*BBI;
2363  }
2364  }
2365  }
2366  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2367  DEBUG({
2368  dbgs() << "prolog:\n";
2369  NewBB->dump();
2370  });
2371  }
2372 
2373  PredBB->replaceSuccessor(BB, KernelBB);
2374 
2375  // Check if we need to remove the branch from the preheader to the original
2376  // loop, and replace it with a branch to the new loop.
2377  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2378  if (numBranches) {
2380  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2381  }
2382 }
2383 
2384 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2385 /// that were started in either the prolog or the kernel. We create a basic
2386 /// block for each stage that needs to complete.
2387 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2388  MachineBasicBlock *KernelBB,
2389  ValueMapTy *VRMap,
2390  MBBVectorTy &EpilogBBs,
2391  MBBVectorTy &PrologBBs) {
2392  // We need to change the branch from the kernel to the first epilog block, so
2393  // this call to analyze branch uses the kernel rather than the original BB.
2394  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2396  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2397  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2398  if (checkBranch)
2399  return;
2400 
2401  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2402  if (*LoopExitI == KernelBB)
2403  ++LoopExitI;
2404  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2405  MachineBasicBlock *LoopExitBB = *LoopExitI;
2406 
2407  MachineBasicBlock *PredBB = KernelBB;
2408  MachineBasicBlock *EpilogStart = LoopExitBB;
2409  InstrMapTy InstrMap;
2410 
2411  // Generate a basic block for each stage, not including the last stage,
2412  // which was generated for the kernel. Each basic block may contain
2413  // instructions from multiple stages/iterations.
2414  int EpilogStage = LastStage + 1;
2415  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2417  EpilogBBs.push_back(NewBB);
2418  MF.insert(BB->getIterator(), NewBB);
2419 
2420  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2421  NewBB->addSuccessor(LoopExitBB);
2422 
2423  if (EpilogStart == LoopExitBB)
2424  EpilogStart = NewBB;
2425 
2426  // Add instructions to the epilog depending on the current block.
2427  // Process instructions in original program order.
2428  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2429  for (auto &BBI : *BB) {
2430  if (BBI.isPHI())
2431  continue;
2432  MachineInstr *In = &BBI;
2433  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2434  MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
2435  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2436  NewBB->push_back(NewMI);
2437  InstrMap[NewMI] = In;
2438  }
2439  }
2440  }
2441  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2442  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2443  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2444  InstrMap, LastStage, EpilogStage, i == 1);
2445  PredBB = NewBB;
2446 
2447  DEBUG({
2448  dbgs() << "epilog:\n";
2449  NewBB->dump();
2450  });
2451  }
2452 
2453  // Fix any Phi nodes in the loop exit block.
2454  for (MachineInstr &MI : *LoopExitBB) {
2455  if (!MI.isPHI())
2456  break;
2457  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2458  MachineOperand &MO = MI.getOperand(i);
2459  if (MO.getMBB() == BB)
2460  MO.setMBB(PredBB);
2461  }
2462  }
2463 
2464  // Create a branch to the new epilog from the kernel.
2465  // Remove the original branch and add a new branch to the epilog.
2466  TII->removeBranch(*KernelBB);
2467  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2468  // Add a branch to the loop exit.
2469  if (EpilogBBs.size() > 0) {
2470  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2472  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2473  }
2474 }
2475 
2476 /// Replace all uses of FromReg that appear outside the specified
2477 /// basic block with ToReg.
2478 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2479  MachineBasicBlock *MBB,
2480  MachineRegisterInfo &MRI,
2481  LiveIntervals &LIS) {
2482  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2483  E = MRI.use_end();
2484  I != E;) {
2485  MachineOperand &O = *I;
2486  ++I;
2487  if (O.getParent()->getParent() != MBB)
2488  O.setReg(ToReg);
2489  }
2490  if (!LIS.hasInterval(ToReg))
2491  LIS.createEmptyInterval(ToReg);
2492 }
2493 
2494 /// Return true if the register has a use that occurs outside the
2495 /// specified loop.
2496 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2497  MachineRegisterInfo &MRI) {
2499  E = MRI.use_end();
2500  I != E; ++I)
2501  if (I->getParent()->getParent() != BB)
2502  return true;
2503  return false;
2504 }
2505 
2506 /// Generate Phis for the specific block in the generated pipelined code.
2507 /// This function looks at the Phis from the original code to guide the
2508 /// creation of new Phis.
2509 void SwingSchedulerDAG::generateExistingPhis(
2511  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2512  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2513  bool IsLast) {
2514  // Compute the stage number for the initial value of the Phi, which
2515  // comes from the prolog. The prolog to use depends on to which kernel/
2516  // epilog that we're adding the Phi.
2517  unsigned PrologStage = 0;
2518  unsigned PrevStage = 0;
2519  bool InKernel = (LastStageNum == CurStageNum);
2520  if (InKernel) {
2521  PrologStage = LastStageNum - 1;
2522  PrevStage = CurStageNum;
2523  } else {
2524  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2525  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2526  }
2527 
2528  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2529  BBE = BB->getFirstNonPHI();
2530  BBI != BBE; ++BBI) {
2531  unsigned Def = BBI->getOperand(0).getReg();
2532 
2533  unsigned InitVal = 0;
2534  unsigned LoopVal = 0;
2535  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2536 
2537  unsigned PhiOp1 = 0;
2538  // The Phi value from the loop body typically is defined in the loop, but
2539  // not always. So, we need to check if the value is defined in the loop.
2540  unsigned PhiOp2 = LoopVal;
2541  if (VRMap[LastStageNum].count(LoopVal))
2542  PhiOp2 = VRMap[LastStageNum][LoopVal];
2543 
2544  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2545  int LoopValStage =
2546  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2547  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2548  if (NumStages == 0) {
2549  // We don't need to generate a Phi anymore, but we need to rename any uses
2550  // of the Phi value.
2551  unsigned NewReg = VRMap[PrevStage][LoopVal];
2552  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2553  Def, NewReg);
2554  if (VRMap[CurStageNum].count(LoopVal))
2555  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2556  }
2557  // Adjust the number of Phis needed depending on the number of prologs left,
2558  // and the distance from where the Phi is first scheduled.
2559  unsigned NumPhis = NumStages;
2560  if (!InKernel && (int)PrologStage < LoopValStage)
2561  // The NumPhis is the maximum number of new Phis needed during the steady
2562  // state. If the Phi has not been scheduled in current prolog, then we
2563  // need to generate less Phis.
2564  NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
2565  // The number of Phis cannot exceed the number of prolog stages. Each
2566  // stage can potentially define two values.
2567  NumPhis = std::min(NumPhis, PrologStage + 2);
2568 
2569  unsigned NewReg = 0;
2570 
2571  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2572  // In the epilog, we may need to look back one stage to get the correct
2573  // Phi name because the epilog and prolog blocks execute the same stage.
2574  // The correct name is from the previous block only when the Phi has
2575  // been completely scheduled prior to the epilog, and Phi value is not
2576  // needed in multiple stages.
2577  int StageDiff = 0;
2578  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2579  NumPhis == 1)
2580  StageDiff = 1;
2581  // Adjust the computations below when the phi and the loop definition
2582  // are scheduled in different stages.
2583  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2584  StageDiff = StageScheduled - LoopValStage;
2585  for (unsigned np = 0; np < NumPhis; ++np) {
2586  // If the Phi hasn't been scheduled, then use the initial Phi operand
2587  // value. Otherwise, use the scheduled version of the instruction. This
2588  // is a little complicated when a Phi references another Phi.
2589  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2590  PhiOp1 = InitVal;
2591  // Check if the Phi has already been scheduled in a prolog stage.
2592  else if (PrologStage >= AccessStage + StageDiff + np &&
2593  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2594  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2595  // Check if the Phi has already been scheduled, but the loop intruction
2596  // is either another Phi, or doesn't occur in the loop.
2597  else if (PrologStage >= AccessStage + StageDiff + np) {
2598  // If the Phi references another Phi, we need to examine the other
2599  // Phi to get the correct value.
2600  PhiOp1 = LoopVal;
2601  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2602  int Indirects = 1;
2603  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2604  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2605  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2606  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2607  else
2608  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2609  InstOp1 = MRI.getVRegDef(PhiOp1);
2610  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2611  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2612  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2613  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2614  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2615  break;
2616  }
2617  ++Indirects;
2618  }
2619  } else
2620  PhiOp1 = InitVal;
2621  // If this references a generated Phi in the kernel, get the Phi operand
2622  // from the incoming block.
2623  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2624  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2625  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2626 
2627  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2628  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2629  // In the epilog, a map lookup is needed to get the value from the kernel,
2630  // or previous epilog block. How is does this depends on if the
2631  // instruction is scheduled in the previous block.
2632  if (!InKernel) {
2633  int StageDiffAdj = 0;
2634  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2635  StageDiffAdj = StageScheduled - LoopValStage;
2636  // Use the loop value defined in the kernel, unless the kernel
2637  // contains the last definition of the Phi.
2638  if (np == 0 && PrevStage == LastStageNum &&
2639  (StageScheduled != 0 || LoopValStage != 0) &&
2640  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2641  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2642  // Use the value defined by the Phi. We add one because we switch
2643  // from looking at the loop value to the Phi definition.
2644  else if (np > 0 && PrevStage == LastStageNum &&
2645  VRMap[PrevStage - np + 1].count(Def))
2646  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2647  // Use the loop value defined in the kernel.
2648  else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
2649  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2650  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2651  // Use the value defined by the Phi, unless we're generating the first
2652  // epilog and the Phi refers to a Phi in a different stage.
2653  else if (VRMap[PrevStage - np].count(Def) &&
2654  (!LoopDefIsPhi || PrevStage != LastStageNum))
2655  PhiOp2 = VRMap[PrevStage - np][Def];
2656  }
2657 
2658  // Check if we can reuse an existing Phi. This occurs when a Phi
2659  // references another Phi, and the other Phi is scheduled in an
2660  // earlier stage. We can try to reuse an existing Phi up until the last
2661  // stage of the current Phi.
2662  if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
2663  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2664  int StageDiff = (StageScheduled - LoopValStage);
2665  LVNumStages -= StageDiff;
2666  if (LVNumStages > (int)np) {
2667  NewReg = PhiOp2;
2668  unsigned ReuseStage = CurStageNum;
2669  if (Schedule.isLoopCarried(this, *PhiInst))
2670  ReuseStage -= LVNumStages;
2671  // Check if the Phi to reuse has been generated yet. If not, then
2672  // there is nothing to reuse.
2673  if (VRMap[ReuseStage].count(LoopVal)) {
2674  NewReg = VRMap[ReuseStage][LoopVal];
2675 
2676  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2677  &*BBI, Def, NewReg);
2678  // Update the map with the new Phi name.
2679  VRMap[CurStageNum - np][Def] = NewReg;
2680  PhiOp2 = NewReg;
2681  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2682  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2683 
2684  if (IsLast && np == NumPhis - 1)
2685  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2686  continue;
2687  }
2688  } else if (InKernel && StageDiff > 0 &&
2689  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2690  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2691  }
2692 
2693  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2694  NewReg = MRI.createVirtualRegister(RC);
2695 
2696  MachineInstrBuilder NewPhi =
2697  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2698  TII->get(TargetOpcode::PHI), NewReg);
2699  NewPhi.addReg(PhiOp1).addMBB(BB1);
2700  NewPhi.addReg(PhiOp2).addMBB(BB2);
2701  if (np == 0)
2702  InstrMap[NewPhi] = &*BBI;
2703 
2704  // We define the Phis after creating the new pipelined code, so
2705  // we need to rename the Phi values in scheduled instructions.
2706 
2707  unsigned PrevReg = 0;
2708  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2709  PrevReg = VRMap[PrevStage - np][LoopVal];
2710  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2711  Def, NewReg, PrevReg);
2712  // If the Phi has been scheduled, use the new name for rewriting.
2713  if (VRMap[CurStageNum - np].count(Def)) {
2714  unsigned R = VRMap[CurStageNum - np][Def];
2715  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2716  R, NewReg);
2717  }
2718 
2719  // Check if we need to rename any uses that occurs after the loop. The
2720  // register to replace depends on whether the Phi is scheduled in the
2721  // epilog.
2722  if (IsLast && np == NumPhis - 1)
2723  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2724 
2725  // In the kernel, a dependent Phi uses the value from this Phi.
2726  if (InKernel)
2727  PhiOp2 = NewReg;
2728 
2729  // Update the map with the new Phi name.
2730  VRMap[CurStageNum - np][Def] = NewReg;
2731  }
2732 
2733  while (NumPhis++ < NumStages) {
2734  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2735  &*BBI, Def, NewReg, 0);
2736  }
2737 
2738  // Check if we need to rename a Phi that has been eliminated due to
2739  // scheduling.
2740  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2741  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2742  }
2743 }
2744 
2745 /// Generate Phis for the specified block in the generated pipelined code.
2746 /// These are new Phis needed because the definition is scheduled after the
2747 /// use in the pipelened sequence.
2748 void SwingSchedulerDAG::generatePhis(
2750  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2751  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2752  bool IsLast) {
2753  // Compute the stage number that contains the initial Phi value, and
2754  // the Phi from the previous stage.
2755  unsigned PrologStage = 0;
2756  unsigned PrevStage = 0;
2757  unsigned StageDiff = CurStageNum - LastStageNum;
2758  bool InKernel = (StageDiff == 0);
2759  if (InKernel) {
2760  PrologStage = LastStageNum - 1;
2761  PrevStage = CurStageNum;
2762  } else {
2763  PrologStage = LastStageNum - StageDiff;
2764  PrevStage = LastStageNum + StageDiff - 1;
2765  }
2766 
2767  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2768  BBE = BB->instr_end();
2769  BBI != BBE; ++BBI) {
2770  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2771  MachineOperand &MO = BBI->getOperand(i);
2772  if (!MO.isReg() || !MO.isDef() ||
2774  continue;
2775 
2776  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2777  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2778  unsigned Def = MO.getReg();
2779  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2780  // An instruction scheduled in stage 0 and is used after the loop
2781  // requires a phi in the epilog for the last definition from either
2782  // the kernel or prolog.
2783  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2784  hasUseAfterLoop(Def, BB, MRI))
2785  NumPhis = 1;
2786  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2787  continue;
2788 
2789  unsigned PhiOp2 = VRMap[PrevStage][Def];
2790  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2791  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2792  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2793  // The number of Phis can't exceed the number of prolog stages. The
2794  // prolog stage number is zero based.
2795  if (NumPhis > PrologStage + 1 - StageScheduled)
2796  NumPhis = PrologStage + 1 - StageScheduled;
2797  for (unsigned np = 0; np < NumPhis; ++np) {
2798  unsigned PhiOp1 = VRMap[PrologStage][Def];
2799  if (np <= PrologStage)
2800  PhiOp1 = VRMap[PrologStage - np][Def];
2801  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2802  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2803  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2804  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2805  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2806  }
2807  if (!InKernel)
2808  PhiOp2 = VRMap[PrevStage - np][Def];
2809 
2810  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2811  unsigned NewReg = MRI.createVirtualRegister(RC);
2812 
2813  MachineInstrBuilder NewPhi =
2814  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2815  TII->get(TargetOpcode::PHI), NewReg);
2816  NewPhi.addReg(PhiOp1).addMBB(BB1);
2817  NewPhi.addReg(PhiOp2).addMBB(BB2);
2818  if (np == 0)
2819  InstrMap[NewPhi] = &*BBI;
2820 
2821  // Rewrite uses and update the map. The actions depend upon whether
2822  // we generating code for the kernel or epilog blocks.
2823  if (InKernel) {
2824  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2825  &*BBI, PhiOp1, NewReg);
2826  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2827  &*BBI, PhiOp2, NewReg);
2828 
2829  PhiOp2 = NewReg;
2830  VRMap[PrevStage - np - 1][Def] = NewReg;
2831  } else {
2832  VRMap[CurStageNum - np][Def] = NewReg;
2833  if (np == NumPhis - 1)
2834  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2835  &*BBI, Def, NewReg);
2836  }
2837  if (IsLast && np == NumPhis - 1)
2838  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2839  }
2840  }
2841  }
2842 }
2843 
2844 /// Remove instructions that generate values with no uses.
2845 /// Typically, these are induction variable operations that generate values
2846 /// used in the loop itself. A dead instruction has a definition with
2847 /// no uses, or uses that occur in the original loop only.
2848 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2849  MBBVectorTy &EpilogBBs) {
2850  // For each epilog block, check that the value defined by each instruction
2851  // is used. If not, delete it.
2852  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2853  MBE = EpilogBBs.rend();
2854  MBB != MBE; ++MBB)
2855  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2856  ME = (*MBB)->instr_rend();
2857  MI != ME;) {
2858  // From DeadMachineInstructionElem. Don't delete inline assembly.
2859  if (MI->isInlineAsm()) {
2860  ++MI;
2861  continue;
2862  }
2863  bool SawStore = false;
2864  // Check if it's safe to remove the instruction due to side effects.
2865  // We can, and want to, remove Phis here.
2866  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2867  ++MI;
2868  continue;
2869  }
2870  bool used = true;
2871  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2872  MOE = MI->operands_end();
2873  MOI != MOE; ++MOI) {
2874  if (!MOI->isReg() || !MOI->isDef())
2875  continue;
2876  unsigned reg = MOI->getReg();
2877  unsigned realUses = 0;
2878  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2879  EI = MRI.use_end();
2880  UI != EI; ++UI) {
2881  // Check if there are any uses that occur only in the original
2882  // loop. If so, that's not a real use.
2883  if (UI->getParent()->getParent() != BB) {
2884  realUses++;
2885  used = true;
2886  break;
2887  }
2888  }
2889  if (realUses > 0)
2890  break;
2891  used = false;
2892  }
2893  if (!used) {
2894  MI++->eraseFromParent();
2895  continue;
2896  }
2897  ++MI;
2898  }
2899  // In the kernel block, check if we can remove a Phi that generates a value
2900  // used in an instruction removed in the epilog block.
2901  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2902  BBE = KernelBB->getFirstNonPHI();
2903  BBI != BBE;) {
2904  MachineInstr *MI = &*BBI;
2905  ++BBI;
2906  unsigned reg = MI->getOperand(0).getReg();
2907  if (MRI.use_begin(reg) == MRI.use_end()) {
2908  MI->eraseFromParent();
2909  }
2910  }
2911 }
2912 
2913 /// For loop carried definitions, we split the lifetime of a virtual register
2914 /// that has uses past the definition in the next iteration. A copy with a new
2915 /// virtual register is inserted before the definition, which helps with
2916 /// generating a better register assignment.
2917 ///
2918 /// v1 = phi(a, v2) v1 = phi(a, v2)
2919 /// v2 = phi(b, v3) v2 = phi(b, v3)
2920 /// v3 = .. v4 = copy v1
2921 /// .. = V1 v3 = ..
2922 /// .. = v4
2923 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2924  MBBVectorTy &EpilogBBs,
2925  SMSchedule &Schedule) {
2926  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2927  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2928  BBF = KernelBB->getFirstNonPHI();
2929  BBI != BBF; ++BBI) {
2930  unsigned Def = BBI->getOperand(0).getReg();
2931  // Check for any Phi definition that used as an operand of another Phi
2932  // in the same block.
2934  E = MRI.use_instr_end();
2935  I != E; ++I) {
2936  if (I->isPHI() && I->getParent() == KernelBB) {
2937  // Get the loop carried definition.
2938  unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
2939  if (!LCDef)
2940  continue;
2941  MachineInstr *MI = MRI.getVRegDef(LCDef);
2942  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2943  continue;
2944  // Search through the rest of the block looking for uses of the Phi
2945  // definition. If one occurs, then split the lifetime.
2946  unsigned SplitReg = 0;
2947  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2948  KernelBB->instr_end()))
2949  if (BBJ.readsRegister(Def)) {
2950  // We split the lifetime when we find the first use.
2951  if (SplitReg == 0) {
2952  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2953  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2954  TII->get(TargetOpcode::COPY), SplitReg)
2955  .addReg(Def);
2956  }
2957  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2958  }
2959  if (!SplitReg)
2960  continue;
2961  // Search through each of the epilog blocks for any uses to be renamed.
2962  for (auto &Epilog : EpilogBBs)
2963  for (auto &I : *Epilog)
2964  if (I.readsRegister(Def))
2965  I.substituteRegister(Def, SplitReg, 0, *TRI);
2966  break;
2967  }
2968  }
2969  }
2970 }
2971 
2972 /// Remove the incoming block from the Phis in a basic block.
2973 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2974  for (MachineInstr &MI : *BB) {
2975  if (!MI.isPHI())
2976  break;
2977  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2978  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2979  MI.RemoveOperand(i + 1);
2980  MI.RemoveOperand(i);
2981  break;
2982  }
2983  }
2984 }
2985 
2986 /// Create branches from each prolog basic block to the appropriate epilog
2987 /// block. These edges are needed if the loop ends before reaching the
2988 /// kernel.
2989 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2990  MachineBasicBlock *KernelBB,
2991  MBBVectorTy &EpilogBBs,
2992  SMSchedule &Schedule, ValueMapTy *VRMap) {
2993  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2994  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2995  MachineInstr *Cmp = Pass.LI.LoopCompare;
2996  MachineBasicBlock *LastPro = KernelBB;
2997  MachineBasicBlock *LastEpi = KernelBB;
2998 
2999  // Start from the blocks connected to the kernel and work "out"
3000  // to the first prolog and the last epilog blocks.
3002  unsigned MaxIter = PrologBBs.size() - 1;
3003  unsigned LC = UINT_MAX;
3004  unsigned LCMin = UINT_MAX;
3005  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3006  // Add branches to the prolog that go to the corresponding
3007  // epilog, and the fall-thru prolog/kernel block.
3008  MachineBasicBlock *Prolog = PrologBBs[j];
3009  MachineBasicBlock *Epilog = EpilogBBs[i];
3010  // We've executed one iteration, so decrement the loop count and check for
3011  // the loop end.
3013  // Check if the LOOP0 has already been removed. If so, then there is no need
3014  // to reduce the trip count.
3015  if (LC != 0)
3016  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3017  MaxIter);
3018 
3019  // Record the value of the first trip count, which is used to determine if
3020  // branches and blocks can be removed for constant trip counts.
3021  if (LCMin == UINT_MAX)
3022  LCMin = LC;
3023 
3024  unsigned numAdded = 0;
3026  Prolog->addSuccessor(Epilog);
3027  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3028  } else if (j >= LCMin) {
3029  Prolog->addSuccessor(Epilog);
3030  Prolog->removeSuccessor(LastPro);
3031  LastEpi->removeSuccessor(Epilog);
3032  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3033  removePhis(Epilog, LastEpi);
3034  // Remove the blocks that are no longer referenced.
3035  if (LastPro != LastEpi) {
3036  LastEpi->clear();
3037  LastEpi->eraseFromParent();
3038  }
3039  LastPro->clear();
3040  LastPro->eraseFromParent();
3041  } else {
3042  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3043  removePhis(Epilog, Prolog);
3044  }
3045  LastPro = Prolog;
3046  LastEpi = Epilog;
3048  E = Prolog->instr_rend();
3049  I != E && numAdded > 0; ++I, --numAdded)
3050  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3051  }
3052 }
3053 
3054 /// Return true if we can compute the amount the instruction changes
3055 /// during each iteration. Set Delta to the amount of the change.
3056 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3057  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3058  unsigned BaseReg;
3059  int64_t Offset;
3060  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
3061  return false;
3062 
3063  MachineRegisterInfo &MRI = MF.getRegInfo();
3064  // Check if there is a Phi. If so, get the definition in the loop.
3065  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3066  if (BaseDef && BaseDef->isPHI()) {
3067  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3068  BaseDef = MRI.getVRegDef(BaseReg);
3069  }
3070  if (!BaseDef)
3071  return false;
3072 
3073  int D = 0;
3074  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3075  return false;
3076 
3077  Delta = D;
3078  return true;
3079 }
3080 
3081 /// Update the memory operand with a new offset when the pipeliner
3082 /// generates a new copy of the instruction that refers to a
3083 /// different memory location.
3084 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3085  MachineInstr &OldMI, unsigned Num) {
3086  if (Num == 0)
3087  return;
3088  // If the instruction has memory operands, then adjust the offset
3089  // when the instruction appears in different stages.
3090  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
3091  if (NumRefs == 0)
3092  return;
3093  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
3094  unsigned Refs = 0;
3095  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3096  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3097  (!MMO->getValue())) {
3098  NewMemRefs[Refs++] = MMO;
3099  continue;
3100  }
3101  unsigned Delta;
3102  if (computeDelta(OldMI, Delta)) {
3103  int64_t AdjOffset = Delta * Num;
3104  NewMemRefs[Refs++] =
3105  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
3106  } else
3107  NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
3108  }
3109  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
3110 }
3111 
3112 /// Clone the instruction for the new pipelined loop and update the
3113 /// memory operands, if needed.
3114 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3115  unsigned CurStageNum,
3116  unsigned InstStageNum) {
3117  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3118  // Check for tied operands in inline asm instructions. This should be handled
3119  // elsewhere, but I'm not sure of the best solution.
3120  if (OldMI->isInlineAsm())
3121  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3122  const auto &MO = OldMI->getOperand(i);
3123  if (MO.isReg() && MO.isUse())
3124  break;
3125  unsigned UseIdx;
3126  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3127  NewMI->tieOperands(i, UseIdx);
3128  }
3129  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3130  return NewMI;
3131 }
3132 
3133 /// Clone the instruction for the new pipelined loop. If needed, this
3134 /// function updates the instruction using the values saved in the
3135 /// InstrChanges structure.
3136 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3137  unsigned CurStageNum,
3138  unsigned InstStageNum,
3139  SMSchedule &Schedule) {
3140  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3142  InstrChanges.find(getSUnit(OldMI));
3143  if (It != InstrChanges.end()) {
3144  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3145  unsigned BasePos, OffsetPos;
3146  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3147  return nullptr;
3148  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3149  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3150  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3151  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3152  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3153  }
3154  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3155  return NewMI;
3156 }
3157 
3158 /// Update the machine instruction with new virtual registers. This
3159 /// function may change the defintions and/or uses.
3160 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3161  unsigned CurStageNum,
3162  unsigned InstrStageNum,
3163  SMSchedule &Schedule,
3164  ValueMapTy *VRMap) {
3165  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3166  MachineOperand &MO = NewMI->getOperand(i);
3168  continue;
3169  unsigned reg = MO.getReg();
3170  if (MO.isDef()) {
3171  // Create a new virtual register for the definition.
3172  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3173  unsigned NewReg = MRI.createVirtualRegister(RC);
3174  MO.setReg(NewReg);
3175  VRMap[CurStageNum][reg] = NewReg;
3176  if (LastDef)
3177  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3178  } else if (MO.isUse()) {
3179  MachineInstr *Def = MRI.getVRegDef(reg);
3180  // Compute the stage that contains the last definition for instruction.
3181  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3182  unsigned StageNum = CurStageNum;
3183  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3184  // Compute the difference in stages between the defintion and the use.
3185  unsigned StageDiff = (InstrStageNum - DefStageNum);
3186  // Make an adjustment to get the last definition.
3187  StageNum -= StageDiff;
3188  }
3189  if (VRMap[StageNum].count(reg))
3190  MO.setReg(VRMap[StageNum][reg]);
3191  }
3192  }
3193 }
3194 
3195 /// Return the instruction in the loop that defines the register.
3196 /// If the definition is a Phi, then follow the Phi operand to
3197 /// the instruction in the loop.
3198 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3200  MachineInstr *Def = MRI.getVRegDef(Reg);
3201  while (Def->isPHI()) {
3202  if (!Visited.insert(Def).second)
3203  break;
3204  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3205  if (Def->getOperand(i + 1).getMBB() == BB) {
3206  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3207  break;
3208  }
3209  }
3210  return Def;
3211 }
3212 
3213 /// Return the new name for the value from the previous stage.
3214 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3215  unsigned LoopVal, unsigned LoopStage,
3216  ValueMapTy *VRMap,
3217  MachineBasicBlock *BB) {
3218  unsigned PrevVal = 0;
3219  if (StageNum > PhiStage) {
3220  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3221  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3222  // The name is defined in the previous stage.
3223  PrevVal = VRMap[StageNum - 1][LoopVal];
3224  else if (VRMap[StageNum].count(LoopVal))
3225  // The previous name is defined in the current stage when the instruction
3226  // order is swapped.
3227  PrevVal = VRMap[StageNum][LoopVal];
3228  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3229  // The loop value hasn't yet been scheduled.
3230  PrevVal = LoopVal;
3231  else if (StageNum == PhiStage + 1)
3232  // The loop value is another phi, which has not been scheduled.
3233  PrevVal = getInitPhiReg(*LoopInst, BB);
3234  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3235  // The loop value is another phi, which has been scheduled.
3236  PrevVal =
3237  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3238  LoopStage, VRMap, BB);
3239  }
3240  return PrevVal;
3241 }
3242 
3243 /// Rewrite the Phi values in the specified block to use the mappings
3244 /// from the initial operand. Once the Phi is scheduled, we switch
3245 /// to using the loop value instead of the Phi value, so those names
3246 /// do not need to be rewritten.
3247 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3248  unsigned StageNum,
3249  SMSchedule &Schedule,
3250  ValueMapTy *VRMap,
3251  InstrMapTy &InstrMap) {
3252  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
3253  BBE = BB->getFirstNonPHI();
3254  BBI != BBE; ++BBI) {
3255  unsigned InitVal = 0;
3256  unsigned LoopVal = 0;
3257  getPhiRegs(*BBI, BB, InitVal, LoopVal);
3258  unsigned PhiDef = BBI->getOperand(0).getReg();
3259 
3260  unsigned PhiStage =
3261  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3262  unsigned LoopStage =
3263  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3264  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3265  if (NumPhis > StageNum)
3266  NumPhis = StageNum;
3267  for (unsigned np = 0; np <= NumPhis; ++np) {
3268  unsigned NewVal =
3269  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3270  if (!NewVal)
3271  NewVal = InitVal;
3272  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
3273  PhiDef, NewVal);
3274  }
3275  }
3276 }
3277 
3278 /// Rewrite a previously scheduled instruction to use the register value
3279 /// from the new instruction. Make sure the instruction occurs in the
3280 /// basic block, and we don't change the uses in the new instruction.
3281 void SwingSchedulerDAG::rewriteScheduledInstr(
3282  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3283  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3284  unsigned NewReg, unsigned PrevReg) {
3285  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3286  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3287  // Rewrite uses that have been scheduled already to use the new
3288  // Phi register.
3289  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3290  EI = MRI.use_end();
3291  UI != EI;) {
3292  MachineOperand &UseOp = *UI;
3293  MachineInstr *UseMI = UseOp.getParent();
3294  ++UI;
3295  if (UseMI->getParent() != BB)
3296  continue;
3297  if (UseMI->isPHI()) {
3298  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3299  continue;
3300  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3301  continue;
3302  }
3303  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3304  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3305  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3306  int StageSched = Schedule.stageScheduled(OrigMISU);
3307  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3308  unsigned ReplaceReg = 0;
3309  // This is the stage for the scheduled instruction.
3310  if (StagePhi == StageSched && Phi->isPHI()) {
3311  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3312  if (PrevReg && InProlog)
3313  ReplaceReg = PrevReg;
3314  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3315  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3316  ReplaceReg = PrevReg;
3317  else
3318  ReplaceReg = NewReg;
3319  }
3320  // The scheduled instruction occurs before the scheduled Phi, and the
3321  // Phi is not loop carried.
3322  if (!InProlog && StagePhi + 1 == StageSched &&
3323  !Schedule.isLoopCarried(this, *Phi))
3324  ReplaceReg = NewReg;
3325  if (StagePhi > StageSched && Phi->isPHI())
3326  ReplaceReg = NewReg;
3327  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3328  ReplaceReg = NewReg;
3329  if (ReplaceReg) {
3330  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3331  UseOp.setReg(ReplaceReg);
3332  }
3333  }
3334 }
3335 
3336 /// Check if we can change the instruction to use an offset value from the
3337 /// previous iteration. If so, return true and set the base and offset values
3338 /// so that we can rewrite the load, if necessary.
3339 /// v1 = Phi(v0, v3)
3340 /// v2 = load v1, 0
3341 /// v3 = post_store v1, 4, x
3342 /// This function enables the load to be rewritten as v2 = load v3, 4.
3343 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3344  unsigned &BasePos,
3345  unsigned &OffsetPos,
3346  unsigned &NewBase,
3347  int64_t &Offset) {
3348  // Get the load instruction.
3349  if (TII->isPostIncrement(*MI))
3350  return false;
3351  unsigned BasePosLd, OffsetPosLd;
3352  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3353  return false;
3354  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3355 
3356  // Look for the Phi instruction.
3357  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3358  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3359  if (!Phi || !Phi->isPHI())
3360  return false;
3361  // Get the register defined in the loop block.
3362  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3363  if (!PrevReg)
3364  return false;
3365 
3366  // Check for the post-increment load/store instruction.
3367  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3368  if (!PrevDef || PrevDef == MI)
3369  return false;
3370 
3371  if (!TII->isPostIncrement(*PrevDef))
3372  return false;
3373 
3374  unsigned BasePos1 = 0, OffsetPos1 = 0;
3375  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3376  return false;
3377 
3378  // Make sure offset values are both positive or both negative.
3379  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3380  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3381  if ((LoadOffset >= 0) != (StoreOffset >= 0))
3382  return false;
3383 
3384  // Set the return value once we determine that we return true.
3385  BasePos = BasePosLd;
3386  OffsetPos = OffsetPosLd;
3387  NewBase = PrevReg;
3388  Offset = StoreOffset;
3389  return true;
3390 }
3391 
3392 /// Apply changes to the instruction if needed. The changes are need
3393 /// to improve the scheduling and depend up on the final schedule.
3394 void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3395  SMSchedule &Schedule) {
3396  SUnit *SU = getSUnit(MI);
3398  InstrChanges.find(SU);
3399  if (It != InstrChanges.end()) {
3400  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3401  unsigned BasePos, OffsetPos;
3402  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3403  return;
3404  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3405  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3406  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3407  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3408  int BaseStageNum = Schedule.stageScheduled(SU);
3409  int BaseCycleNum = Schedule.cycleScheduled(SU);
3410  if (BaseStageNum < DefStageNum) {
3411  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3412  int OffsetDiff = DefStageNum - BaseStageNum;
3413  if (DefCycleNum < BaseCycleNum) {
3414  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3415  if (OffsetDiff > 0)
3416  --OffsetDiff;
3417  }
3418  int64_t NewOffset =
3419  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3420  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3421  SU->setInstr(NewMI);
3422  MISUnitMap[NewMI] = SU;
3423  NewMIs.insert(NewMI);
3424  }
3425  }
3426 }
3427 
3428 /// Return true for an order dependence that is loop carried potentially.
3429 /// An order dependence is loop carried if the destination defines a value
3430 /// that may be used by the source in a subsequent iteration.
3431 bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
3432  bool isSucc) {
3433  if (!isOrder(Source, Dep) || Dep.isArtificial())
3434  return false;
3435 
3436  if (!SwpPruneLoopCarried)
3437  return true;
3438 
3439  MachineInstr *SI = Source->getInstr();
3440  MachineInstr *DI = Dep.getSUnit()->getInstr();
3441  if (!isSucc)
3442  std::swap(SI, DI);
3443  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3444 
3445  // Assume ordered loads and stores may have a loop carried dependence.
3446  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3448  return true;
3449 
3450  // Only chain dependences between a load and store can be loop carried.
3451  if (!DI->mayStore() || !SI->mayLoad())
3452  return false;
3453 
3454  unsigned DeltaS, DeltaD;
3455  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3456  return true;
3457 
3458  unsigned BaseRegS, BaseRegD;
3459  int64_t OffsetS, OffsetD;
3460  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3461  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
3462  !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
3463  return true;
3464 
3465  if (BaseRegS != BaseRegD)
3466  return true;
3467 
3468  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3469  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3470 
3471  // This is the main test, which checks the offset values and the loop
3472  // increment value to determine if the accesses may be loop carried.
3473  if (OffsetS >= OffsetD)
3474  return OffsetS + AccessSizeS > DeltaS;
3475  else
3476  return OffsetD + AccessSizeD > DeltaD;
3477 
3478  return true;
3479 }
3480 
3481 void SwingSchedulerDAG::postprocessDAG() {
3482  for (auto &M : Mutations)
3483  M->apply(this);
3484 }
3485 
3486 /// Try to schedule the node at the specified StartCycle and continue
3487 /// until the node is schedule or the EndCycle is reached. This function
3488 /// returns true if the node is scheduled. This routine may search either
3489 /// forward or backward for a place to insert the instruction based upon
3490 /// the relative values of StartCycle and EndCycle.
3491 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3492  bool forward = true;
3493  if (StartCycle > EndCycle)
3494  forward = false;
3495 
3496  // The terminating condition depends on the direction.
3497  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3498  for (int curCycle = StartCycle; curCycle != termCycle;
3499  forward ? ++curCycle : --curCycle) {
3500 
3501  // Add the already scheduled instructions at the specified cycle to the DFA.
3502  Resources->clearResources();
3503  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3504  checkCycle <= LastCycle; checkCycle += II) {
3505  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3506 
3507  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3508  E = cycleInstrs.end();
3509  I != E; ++I) {
3510  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3511  continue;
3512  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3513  "These instructions have already been scheduled.");
3514  Resources->reserveResources(*(*I)->getInstr());
3515  }
3516  }
3517  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3518  Resources->canReserveResources(*SU->getInstr())) {
3519  DEBUG({
3520  dbgs() << "\tinsert at cycle " << curCycle << " ";
3521  SU->getInstr()->dump();
3522  });
3523 
3524  ScheduledInstrs[curCycle].push_back(SU);
3525  InstrToCycle.insert(std::make_pair(SU, curCycle));
3526  if (curCycle > LastCycle)
3527  LastCycle = curCycle;
3528  if (curCycle < FirstCycle)
3529  FirstCycle = curCycle;
3530  return true;
3531  }
3532  DEBUG({
3533  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3534  SU->getInstr()->dump();
3535  });
3536  }
3537  return false;
3538 }
3539 
3540 // Return the cycle of the earliest scheduled instruction in the chain.
3541 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3542  SmallPtrSet<SUnit *, 8> Visited;
3543  SmallVector<SDep, 8> Worklist;
3544  Worklist.push_back(Dep);
3545  int EarlyCycle = INT_MAX;
3546  while (!Worklist.empty()) {
3547  const SDep &Cur = Worklist.pop_back_val();
3548  SUnit *PrevSU = Cur.getSUnit();
3549  if (Visited.count(PrevSU))
3550  continue;
3551  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3552  if (it == InstrToCycle.end())
3553  continue;
3554  EarlyCycle = std::min(EarlyCycle, it->second);
3555  for (const auto &PI : PrevSU->Preds)
3556  if (SwingSchedulerDAG::isOrder(PrevSU, PI))
3557  Worklist.push_back(PI);
3558  Visited.insert(PrevSU);
3559  }
3560  return EarlyCycle;
3561 }
3562 
3563 // Return the cycle of the latest scheduled instruction in the chain.
3564 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3565  SmallPtrSet<SUnit *, 8> Visited;
3566  SmallVector<SDep, 8> Worklist;
3567  Worklist.push_back(Dep);
3568  int LateCycle = INT_MIN;
3569  while (!Worklist.empty()) {
3570  const SDep &Cur = Worklist.pop_back_val();
3571  SUnit *SuccSU = Cur.getSUnit();
3572  if (Visited.count(SuccSU))
3573  continue;
3574  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3575  if (it == InstrToCycle.end())
3576  continue;
3577  LateCycle = std::max(LateCycle, it->second);
3578  for (const auto &SI : SuccSU->Succs)
3579  if (SwingSchedulerDAG::isOrder(SuccSU, SI))
3580  Worklist.push_back(SI);
3581  Visited.insert(SuccSU);
3582  }
3583  return LateCycle;
3584 }
3585 
3586 /// If an instruction has a use that spans multiple iterations, then
3587 /// return true. These instructions are characterized by having a back-ege
3588 /// to a Phi, which contains a reference to another Phi.
3589 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3590  for (auto &P : SU->Preds)
3591  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3592  for (auto &S : P.getSUnit()->Succs)
3593  if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
3594  return P.getSUnit();
3595  return nullptr;
3596 }
3597 
3598 /// Compute the scheduling start slot for the instruction. The start slot
3599 /// depends on any predecessor or successor nodes scheduled already.
3600 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3601  int *MinEnd, int *MaxStart, int II,
3602  SwingSchedulerDAG *DAG) {
3603  // Iterate over each instruction that has been scheduled already. The start
3604  // slot computuation depends on whether the previously scheduled instruction
3605  // is a predecessor or successor of the specified instruction.
3606  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3607 
3608  // Iterate over each instruction in the current cycle.
3609  for (SUnit *I : getInstructions(cycle)) {
3610  // Because we're processing a DAG for the dependences, we recognize
3611  // the back-edge in recurrences by anti dependences.
3612  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3613  const SDep &Dep = SU->Preds[i];
3614  if (Dep.getSUnit() == I) {
3615  if (!DAG->isBackedge(SU, Dep)) {
3616  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3617  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3618  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3619  if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
3620  int End = earliestCycleInChain(Dep) + (II - 1);
3621  *MinEnd = std::min(*MinEnd, End);
3622  }
3623  } else {
3624  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3625  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3626  *MinLateStart = std::min(*MinLateStart, LateStart);
3627  }
3628  }
3629  // For instruction that requires multiple iterations, make sure that
3630  // the dependent instruction is not scheduled past the definition.
3631  SUnit *BE = multipleIterations(I, DAG);
3632  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3633  !SU->isPred(I))
3634  *MinLateStart = std::min(*MinLateStart, cycle);
3635  }
3636  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
3637  if (SU->Succs[i].getSUnit() == I) {
3638  const SDep &Dep = SU->Succs[i];
3639  if (!DAG->isBackedge(SU, Dep)) {
3640  int LateStart = cycle - DAG->getLatency(SU, Dep) +
3641  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3642  *MinLateStart = std::min(*MinLateStart, LateStart);
3643  if (DAG->isLoopCarriedOrder(SU, Dep)) {
3644  int Start = latestCycleInChain(Dep) + 1 - II;
3645  *MaxStart = std::max(*MaxStart, Start);
3646  }
3647  } else {
3648  int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
3649  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3650  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3651  }
3652  }
3653  }
3654  }
3655 }
3656 
3657 /// Order the instructions within a cycle so that the definitions occur
3658 /// before the uses. Returns true if the instruction is added to the start
3659 /// of the list, or false if added to the end.
3660 bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3661  std::deque<SUnit *> &Insts) {
3662  MachineInstr *MI = SU->getInstr();
3663  bool OrderBeforeUse = false;
3664  bool OrderAfterDef = false;
3665  bool OrderBeforeDef = false;
3666  unsigned MoveDef = 0;
3667  unsigned MoveUse = 0;
3668  int StageInst1 = stageScheduled(SU);
3669 
3670  unsigned Pos = 0;
3671  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3672  ++I, ++Pos) {
3673  // Relative order of Phis does not matter.
3674  if (MI->isPHI() && (*I)->getInstr()->isPHI())
3675  continue;
3676  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3677  MachineOperand &MO = MI->getOperand(i);
3679  continue;
3680  unsigned Reg = MO.getReg();
3681  unsigned BasePos, OffsetPos;
3682  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3683  if (MI->getOperand(BasePos).getReg() == Reg)
3684  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3685  Reg = NewReg;
3686  bool Reads, Writes;
3687  std::tie(Reads, Writes) =
3688  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3689  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3690  OrderBeforeUse = true;
3691  MoveUse = Pos;
3692  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3693  // Add the instruction after the scheduled instruction.
3694  OrderAfterDef = true;
3695  MoveDef = Pos;
3696  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3697  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3698  OrderBeforeUse = true;
3699  MoveUse = Pos;
3700  } else {
3701  OrderAfterDef = true;
3702  MoveDef = Pos;
3703  }
3704  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3705  OrderBeforeUse = true;
3706  MoveUse = Pos;
3707  if (MoveUse != 0) {
3708  OrderAfterDef = true;
3709  MoveDef = Pos - 1;
3710  }
3711  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3712  // Add the instruction before the scheduled instruction.
3713  OrderBeforeUse = true;
3714  MoveUse = Pos;
3715  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3716  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3717  OrderBeforeDef = true;
3718  MoveUse = Pos;
3719  }
3720  }
3721  // Check for order dependences between instructions. Make sure the source
3722  // is ordered before the destination.
3723  for (auto &S : SU->Succs)
3724  if (S.getKind() == SDep::Order) {
3725  if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3726  OrderBeforeUse = true;
3727  MoveUse = Pos;
3728  }
3729  } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
3730  if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
3731  if (S.isAssignedRegDep()) {
3732  OrderAfterDef = true;
3733  MoveDef = Pos;
3734  }
3735  } else {
3736  OrderBeforeUse = true;
3737  MoveUse = Pos;
3738  }
3739  }
3740  for (auto &P : SU->Preds)
3741  if (P.getKind() == SDep::Order) {
3742  if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
3743  OrderAfterDef = true;
3744  MoveDef = Pos;
3745  }
3746  } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
3747  if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
3748  if (P.isAssignedRegDep()) {
3749  OrderBeforeUse = true;
3750  MoveUse = Pos;
3751  }
3752  } else {
3753  OrderAfterDef = true;
3754  MoveDef = Pos;
3755  }
3756  }
3757  }
3758 
3759  // A circular dependence.
3760  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3761  OrderBeforeUse = false;
3762 
3763  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3764  // to a loop-carried dependence.
3765  if (OrderBeforeDef)
3766  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3767 
3768  // The uncommon case when the instruction order needs to be updated because
3769  // there is both a use and def.
3770  if (OrderBeforeUse && OrderAfterDef) {
3771  SUnit *UseSU = Insts.at(MoveUse);
3772  SUnit *DefSU = Insts.at(MoveDef);
3773  if (MoveUse > MoveDef) {
3774  Insts.erase(Insts.begin() + MoveUse);
3775  Insts.erase(Insts.begin() + MoveDef);
3776  } else {
3777  Insts.erase(Insts.begin() + MoveDef);
3778  Insts.erase(Insts.begin() + MoveUse);
3779  }
3780  if (orderDependence(SSD, UseSU, Insts)) {
3781  Insts.push_front(SU);
3782  orderDependence(SSD, DefSU, Insts);
3783  return true;
3784  }
3785  Insts.pop_back();
3786  Insts.push_back(SU);
3787  Insts.push_back(UseSU);
3788  orderDependence(SSD, DefSU, Insts);
3789  return false;
3790  }
3791  // Put the new instruction first if there is a use in the list. Otherwise,
3792  // put it at the end of the list.
3793  if (OrderBeforeUse)
3794  Insts.push_front(SU);
3795  else
3796  Insts.push_back(SU);
3797  return OrderBeforeUse;
3798 }
3799 
3800 /// Return true if the scheduled Phi has a loop carried operand.
3801 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3802  if (!Phi.isPHI())
3803  return false;
3804  assert(Phi.isPHI() && "Expecing a Phi.");
3805  SUnit *DefSU = SSD->getSUnit(&Phi);
3806  unsigned DefCycle = cycleScheduled(DefSU);
3807  int DefStage = stageScheduled(DefSU);
3808 
3809  unsigned InitVal = 0;
3810  unsigned LoopVal = 0;
3811  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3812  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3813  if (!UseSU)
3814  return true;
3815  if (UseSU->getInstr()->isPHI())
3816  return true;
3817  unsigned LoopCycle = cycleScheduled(UseSU);
3818  int LoopStage = stageScheduled(UseSU);
3819  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3820 }
3821 
3822 /// Return true if the instruction is a definition that is loop carried
3823 /// and defines the use on the next iteration.
3824 /// v1 = phi(v2, v3)
3825 /// (Def) v3 = op v1
3826 /// (MO) = v1
3827 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3828 /// register.
3829 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
3831  if (!MO.isReg())
3832  return false;
3833  if (Def->isPHI())
3834  return false;
3835  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3836  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3837  return false;
3838  if (!isLoopCarried(SSD, *Phi))
3839  return false;
3840  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3841  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3842  MachineOperand &DMO = Def->getOperand(i);
3843  if (!DMO.isReg() || !DMO.isDef())
3844  continue;
3845  if (DMO.getReg() == LoopReg)
3846  return true;
3847  }
3848  return false;
3849 }
3850 
3851 // Check if the generated schedule is valid. This function checks if
3852 // an instruction that uses a physical register is scheduled in a
3853 // different stage than the definition. The pipeliner does not handle
3854 // physical register values that may cross a basic block boundary.
3855 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3856  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3857  SUnit &SU = SSD->SUnits[i];
3858  if (!SU.hasPhysRegDefs)
3859  continue;
3860  int StageDef = stageScheduled(&SU);
3861  assert(StageDef != -1 && "Instruction should have been scheduled.");
3862  for (auto &SI : SU.Succs)
3863  if (SI.isAssignedRegDep())
3864  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3865  if (stageScheduled(SI.getSUnit()) != StageDef)
3866  return false;
3867  }
3868  return true;
3869 }
3870 
3871 /// Attempt to fix the degenerate cases when the instruction serialization
3872 /// causes the register lifetimes to overlap. For example,
3873 /// p' = store_pi(p, b)
3874 /// = load p, offset
3875 /// In this case p and p' overlap, which means that two registers are needed.
3876 /// Instead, this function changes the load to use p' and updates the offset.
3877 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3878  unsigned OverlapReg = 0;
3879  unsigned NewBaseReg = 0;
3880  for (SUnit *SU : Instrs) {
3881  MachineInstr *MI = SU->getInstr();
3882  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3883  const MachineOperand &MO = MI->getOperand(i);
3884  // Look for an instruction that uses p. The instruction occurs in the
3885  // same cycle but occurs later in the serialized order.
3886  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3887  // Check that the instruction appears in the InstrChanges structure,
3888  // which contains instructions that can have the offset updated.
3890  InstrChanges.find(SU);
3891  if (It != InstrChanges.end()) {
3892  unsigned BasePos, OffsetPos;
3893  // Update the base register and adjust the offset.
3894  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3895  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3896  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3897  int64_t NewOffset =
3898  MI->getOperand(OffsetPos).getImm() - It->second.second;
3899  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3900  SU->setInstr(NewMI);
3901  MISUnitMap[NewMI] = SU;
3902  NewMIs.insert(NewMI);
3903  }
3904  }
3905  OverlapReg = 0;
3906  NewBaseReg = 0;
3907  break;
3908  }
3909  // Look for an instruction of the form p' = op(p), which uses and defines
3910  // two virtual registers that get allocated to the same physical register.
3911  unsigned TiedUseIdx = 0;
3912  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3913  // OverlapReg is p in the example above.
3914  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3915  // NewBaseReg is p' in the example above.
3916  NewBaseReg = MI->getOperand(i).getReg();
3917  break;
3918  }
3919  }
3920  }
3921 }
3922 
3923 /// After the schedule has been formed, call this function to combine
3924 /// the instructions from the different stages/cycles. That is, this
3925 /// function creates a schedule that represents a single iteration.
3926 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
3927  // Move all instructions to the first stage from later stages.
3928  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3929  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3930  ++stage) {
3931  std::deque<SUnit *> &cycleInstrs =
3932  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3933  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3934  E = cycleInstrs.rend();
3935  I != E; ++I)
3936  ScheduledInstrs[cycle].push_front(*I);
3937  }
3938  }
3939  // Iterate over the definitions in each instruction, and compute the
3940  // stage difference for each use. Keep the maximum value.
3941  for (auto &I : InstrToCycle) {
3942  int DefStage = stageScheduled(I.first);
3943  MachineInstr *MI = I.first->getInstr();
3944  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3945  MachineOperand &Op = MI->getOperand(i);
3946  if (!Op.isReg() || !Op.isDef())
3947  continue;
3948 
3949  unsigned Reg = Op.getReg();
3950  unsigned MaxDiff = 0;
3951  bool PhiIsSwapped = false;
3952  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3953  EI = MRI.use_end();
3954  UI != EI; ++UI) {
3955  MachineOperand &UseOp = *UI;
3956  MachineInstr *UseMI = UseOp.getParent();
3957  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3958  int UseStage = stageScheduled(SUnitUse);
3959  unsigned Diff = 0;
3960  if (UseStage != -1 && UseStage >= DefStage)
3961  Diff = UseStage - DefStage;
3962  if (MI->isPHI()) {
3963  if (isLoopCarried(SSD, *MI))
3964  ++Diff;
3965  else
3966  PhiIsSwapped = true;
3967  }
3968  MaxDiff = std::max(Diff, MaxDiff);
3969  }
3970  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3971  }
3972  }
3973 
3974  // Erase all the elements in the later stages. Only one iteration should
3975  // remain in the scheduled list, and it contains all the instructions.
3976  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3977  ScheduledInstrs.erase(cycle);
3978 
3979  // Change the registers in instruction as specified in the InstrChanges
3980  // map. We need to use the new registers to create the correct order.
3981  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3982  SUnit *SU = &SSD->SUnits[i];
3983  SSD->applyInstrChange(SU->getInstr(), *this);
3984  }
3985 
3986  // Reorder the instructions in each cycle to fix and improve the
3987  // generated code.
3988  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3989  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3990  std::deque<SUnit *> newOrderZC;
3991  // Put the zero-cost, pseudo instructions at the start of the cycle.
3992  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3993  SUnit *SU = cycleInstrs[i];
3994  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3995  orderDependence(SSD, SU, newOrderZC);
3996  }
3997  std::deque<SUnit *> newOrderI;
3998  // Then, add the regular instructions back.
3999  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4000  SUnit *SU = cycleInstrs[i];
4001  if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
4002  orderDependence(SSD, SU, newOrderI);
4003  }
4004  // Replace the old order with the new order.
4005  cycleInstrs.swap(newOrderZC);
4006  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
4007  SSD->fixupRegisterOverlaps(cycleInstrs);
4008  }
4009 
4010  DEBUG(dump(););
4011 }
4012 
4013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4014 /// Print the schedule information to the given output.
4015 void SMSchedule::print(raw_ostream &os) const {
4016  // Iterate over each cycle.
4017  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4018  // Iterate over each instruction in the cycle.
4019  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
4020  for (SUnit *CI : cycleInstrs->second) {
4021  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
4022  os << "(" << CI->NodeNum << ") ";
4023  CI->getInstr()->print(os);
4024  os << "\n";
4025  }
4026  }
4027 }
4028 
4029 /// Utility function used for debugging to print the schedule.
4030 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
4031 #endif
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:755
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
BitVector & set()
Definition: BitVector.h:398
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:327
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:85
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:458
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:235
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:807
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void dump(const ScheduleDAG *G) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isInlineAsm() const
Definition: MachineInstr.h:832
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:396
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
bool isPHI() const
Definition: MachineInstr.h:826
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:265
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:264
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:222
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
BlockT * getHeader() const
Definition: LoopInfo.h:100
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:427
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:829
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
iterator find(const KeyT &Key)
Definition: MapVector.h:144
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
TargetInstrInfo - Interface to description of machine instruction set.
const Value * getValue() const
Return the base address of the memory access.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:565
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:639
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
Represent the analysis usage information of a pass.
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:404
BitVector & reset()
Definition: BitVector.h:439
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:50
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
print lazy value Lazy Value Info Printer Pass
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:389
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
MachineInstrBuilder MachineInstrBuilder & DefMI
static LaneBitmask getNone()
Definition: LaneBitmask.h:83
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:748
void clear()
Completely clear the SetVector.
Definition: SetVector.h:216
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2)
Return true if Inst1 defines a value that is used in Inst2.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:267
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:210
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:482
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1948
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:59
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
void print(raw_ostream &OS, bool SkipOpers=false, bool SkipDebugLoc=false, const TargetInstrInfo *TII=nullptr) const
Debugging supportPrint this MI to OS.
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:626
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:68
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
mop_iterator operands_begin()
Definition: MachineInstr.h:326
A vector that has set insertion semantics.
Definition: SetVector.h:41
static use_instr_iterator use_instr_end()
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:200
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1946
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the the loop block.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr&#39;s memory reference descriptor list.
std::vector< MachineBasicBlock * >::iterator succ_iterator
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:390
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:436