LLVM  8.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and exploits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/MapVector.h"
65 #include "llvm/ADT/PriorityQueue.h"
66 #include "llvm/ADT/SetVector.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/Statistic.h"
96 #include "llvm/Config/llvm-config.h"
97 #include "llvm/IR/Attributes.h"
98 #include "llvm/IR/DebugLoc.h"
99 #include "llvm/IR/Function.h"
100 #include "llvm/MC/LaneBitmask.h"
101 #include "llvm/MC/MCInstrDesc.h"
103 #include "llvm/MC/MCRegisterInfo.h"
104 #include "llvm/Pass.h"
106 #include "llvm/Support/Compiler.h"
107 #include "llvm/Support/Debug.h"
108 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <climits>
113 #include <cstdint>
114 #include <deque>
115 #include <functional>
116 #include <iterator>
117 #include <map>
118 #include <memory>
119 #include <tuple>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 
125 #define DEBUG_TYPE "pipeliner"
126 
127 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
128 STATISTIC(NumPipelined, "Number of loops software pipelined");
129 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
130 
131 /// A command line option to turn software pipelining on or off.
132 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
134  cl::desc("Enable Software Pipelining"));
135 
136 /// A command line option to enable SWP at -Os.
137 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
138  cl::desc("Enable SWP at Os."), cl::Hidden,
139  cl::init(false));
140 
141 /// A command line argument to limit minimum initial interval for pipelining.
142 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
143  cl::desc("Size limit for the MII."),
144  cl::Hidden, cl::init(27));
145 
146 /// A command line argument to limit the number of stages in the pipeline.
147 static cl::opt<int>
148  SwpMaxStages("pipeliner-max-stages",
149  cl::desc("Maximum stages allowed in the generated scheduled."),
150  cl::Hidden, cl::init(3));
151 
152 /// A command line option to disable the pruning of chain dependences due to
153 /// an unrelated Phi.
154 static cl::opt<bool>
155  SwpPruneDeps("pipeliner-prune-deps",
156  cl::desc("Prune dependences between unrelated Phi nodes."),
157  cl::Hidden, cl::init(true));
158 
159 /// A command line option to disable the pruning of loop carried order
160 /// dependences.
161 static cl::opt<bool>
162  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
163  cl::desc("Prune loop carried order dependences."),
164  cl::Hidden, cl::init(true));
165 
166 #ifndef NDEBUG
167 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
168 #endif
169 
170 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
171  cl::ReallyHidden, cl::init(false),
172  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
173 
174 // A command line option to enable the CopyToPhi DAG mutation.
175 static cl::opt<bool>
176  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
177  cl::init(true), cl::ZeroOrMore,
178  cl::desc("Enable CopyToPhi DAG Mutation"));
179 
180 namespace {
181 
182 class NodeSet;
183 class SMSchedule;
184 
185 /// The main class in the implementation of the target independent
186 /// software pipeliner pass.
187 class MachinePipeliner : public MachineFunctionPass {
188 public:
189  MachineFunction *MF = nullptr;
190  const MachineLoopInfo *MLI = nullptr;
191  const MachineDominatorTree *MDT = nullptr;
192  const InstrItineraryData *InstrItins;
193  const TargetInstrInfo *TII = nullptr;
194  RegisterClassInfo RegClassInfo;
195 
196 #ifndef NDEBUG
197  static int NumTries;
198 #endif
199 
200  /// Cache the target analysis information about the loop.
201  struct LoopInfo {
202  MachineBasicBlock *TBB = nullptr;
203  MachineBasicBlock *FBB = nullptr;
205  MachineInstr *LoopInductionVar = nullptr;
206  MachineInstr *LoopCompare = nullptr;
207  };
208  LoopInfo LI;
209 
210  static char ID;
211 
212  MachinePipeliner() : MachineFunctionPass(ID) {
214  }
215 
216  bool runOnMachineFunction(MachineFunction &MF) override;
217 
218  void getAnalysisUsage(AnalysisUsage &AU) const override {
225  }
226 
227 private:
228  void preprocessPhiNodes(MachineBasicBlock &B);
229  bool canPipelineLoop(MachineLoop &L);
230  bool scheduleLoop(MachineLoop &L);
231  bool swingModuloScheduler(MachineLoop &L);
232 };
233 
234 /// This class builds the dependence graph for the instructions in a loop,
235 /// and attempts to schedule the instructions using the SMS algorithm.
236 class SwingSchedulerDAG : public ScheduleDAGInstrs {
237  MachinePipeliner &Pass;
238  /// The minimum initiation interval between iterations for this schedule.
239  unsigned MII = 0;
240  /// Set to true if a valid pipelined schedule is found for the loop.
241  bool Scheduled = false;
242  MachineLoop &Loop;
243  LiveIntervals &LIS;
244  const RegisterClassInfo &RegClassInfo;
245 
246  /// A toplogical ordering of the SUnits, which is needed for changing
247  /// dependences and iterating over the SUnits.
249 
250  struct NodeInfo {
251  int ASAP = 0;
252  int ALAP = 0;
253  int ZeroLatencyDepth = 0;
254  int ZeroLatencyHeight = 0;
255 
256  NodeInfo() = default;
257  };
258  /// Computed properties for each node in the graph.
259  std::vector<NodeInfo> ScheduleInfo;
260 
261  enum OrderKind { BottomUp = 0, TopDown = 1 };
262  /// Computed node ordering for scheduling.
264 
265  using NodeSetType = SmallVector<NodeSet, 8>;
266  using ValueMapTy = DenseMap<unsigned, unsigned>;
267  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
268  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
269 
270  /// Instructions to change when emitting the final schedule.
272 
273  /// We may create a new instruction, so remember it because it
274  /// must be deleted when the pass is finished.
276 
277  /// Ordered list of DAG postprocessing steps.
278  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
279 
280  /// Helper class to implement Johnson's circuit finding algorithm.
281  class Circuits {
282  std::vector<SUnit> &SUnits;
283  SetVector<SUnit *> Stack;
284  BitVector Blocked;
287  // Node to Index from ScheduleDAGTopologicalSort
288  std::vector<int> *Node2Idx;
289  unsigned NumPaths;
290  static unsigned MaxPaths;
291 
292  public:
293  Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
294  : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
295  Node2Idx = new std::vector<int>(SUs.size());
296  unsigned Idx = 0;
297  for (const auto &NodeNum : Topo)
298  Node2Idx->at(NodeNum) = Idx++;
299  }
300 
301  ~Circuits() { delete Node2Idx; }
302 
303  /// Reset the data structures used in the circuit algorithm.
304  void reset() {
305  Stack.clear();
306  Blocked.reset();
307  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
308  NumPaths = 0;
309  }
310 
311  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
312  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
313  void unblock(int U);
314  };
315 
316  struct CopyToPhiMutation : public ScheduleDAGMutation {
317  void apply(ScheduleDAGInstrs *DAG) override;
318  };
319 
320 public:
321  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
322  const RegisterClassInfo &rci)
323  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
324  RegClassInfo(rci), Topo(SUnits, &ExitSU) {
325  P.MF->getSubtarget().getSMSMutations(Mutations);
326  if (SwpEnableCopyToPhi)
327  Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
328  }
329 
330  void schedule() override;
331  void finishBlock() override;
332 
333  /// Return true if the loop kernel has been scheduled.
334  bool hasNewSchedule() { return Scheduled; }
335 
336  /// Return the earliest time an instruction may be scheduled.
337  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
338 
339  /// Return the latest time an instruction my be scheduled.
340  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
341 
342  /// The mobility function, which the number of slots in which
343  /// an instruction may be scheduled.
344  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
345 
346  /// The depth, in the dependence graph, for a node.
347  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
348 
349  /// The maximum unweighted length of a path from an arbitrary node to the
350  /// given node in which each edge has latency 0
351  int getZeroLatencyDepth(SUnit *Node) {
352  return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
353  }
354 
355  /// The height, in the dependence graph, for a node.
356  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
357 
358  /// The maximum unweighted length of a path from the given node to an
359  /// arbitrary node in which each edge has latency 0
360  int getZeroLatencyHeight(SUnit *Node) {
361  return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
362  }
363 
364  /// Return true if the dependence is a back-edge in the data dependence graph.
365  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
366  /// using an anti dependence from a Phi to an instruction.
367  bool isBackedge(SUnit *Source, const SDep &Dep) {
368  if (Dep.getKind() != SDep::Anti)
369  return false;
370  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
371  }
372 
373  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
374 
375  /// The distance function, which indicates that operation V of iteration I
376  /// depends on operations U of iteration I-distance.
377  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
378  // Instructions that feed a Phi have a distance of 1. Computing larger
379  // values for arrays requires data dependence information.
380  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
381  return 1;
382  return 0;
383  }
384 
385  /// Set the Minimum Initiation Interval for this schedule attempt.
386  void setMII(unsigned mii) { MII = mii; }
387 
388  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
389 
390  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
391 
392  /// Return the new base register that was stored away for the changed
393  /// instruction.
394  unsigned getInstrBaseReg(SUnit *SU) {
396  InstrChanges.find(SU);
397  if (It != InstrChanges.end())
398  return It->second.first;
399  return 0;
400  }
401 
402  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
403  Mutations.push_back(std::move(Mutation));
404  }
405 
406  static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
407 
408 private:
409  void addLoopCarriedDependences(AliasAnalysis *AA);
410  void updatePhiDependences();
411  void changeDependences();
412  unsigned calculateResMII();
413  unsigned calculateRecMII(NodeSetType &RecNodeSets);
414  void findCircuits(NodeSetType &NodeSets);
415  void fuseRecs(NodeSetType &NodeSets);
416  void removeDuplicateNodes(NodeSetType &NodeSets);
417  void computeNodeFunctions(NodeSetType &NodeSets);
418  void registerPressureFilter(NodeSetType &NodeSets);
419  void colocateNodeSets(NodeSetType &NodeSets);
420  void checkNodeSets(NodeSetType &NodeSets);
421  void groupRemainingNodes(NodeSetType &NodeSets);
422  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
423  SetVector<SUnit *> &NodesAdded);
424  void computeNodeOrder(NodeSetType &NodeSets);
425  void checkValidNodeOrder(const NodeSetType &Circuits) const;
426  bool schedulePipeline(SMSchedule &Schedule);
427  void generatePipelinedLoop(SMSchedule &Schedule);
428  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
429  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
430  MBBVectorTy &PrologBBs);
431  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
432  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
433  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
434  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
435  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
436  SMSchedule &Schedule, ValueMapTy *VRMap,
437  InstrMapTy &InstrMap, unsigned LastStageNum,
438  unsigned CurStageNum, bool IsLast);
439  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
440  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
441  SMSchedule &Schedule, ValueMapTy *VRMap,
442  InstrMapTy &InstrMap, unsigned LastStageNum,
443  unsigned CurStageNum, bool IsLast);
444  void removeDeadInstructions(MachineBasicBlock *KernelBB,
445  MBBVectorTy &EpilogBBs);
446  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
447  SMSchedule &Schedule);
448  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
449  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
450  ValueMapTy *VRMap);
451  bool computeDelta(MachineInstr &MI, unsigned &Delta);
452  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
453  unsigned Num);
454  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
455  unsigned InstStageNum);
456  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
457  unsigned InstStageNum,
458  SMSchedule &Schedule);
459  void updateInstruction(MachineInstr *NewMI, bool LastDef,
460  unsigned CurStageNum, unsigned InstrStageNum,
461  SMSchedule &Schedule, ValueMapTy *VRMap);
462  MachineInstr *findDefInLoop(unsigned Reg);
463  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
464  unsigned LoopStage, ValueMapTy *VRMap,
465  MachineBasicBlock *BB);
466  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
467  SMSchedule &Schedule, ValueMapTy *VRMap,
468  InstrMapTy &InstrMap);
469  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
470  InstrMapTy &InstrMap, unsigned CurStageNum,
471  unsigned PhiNum, MachineInstr *Phi,
472  unsigned OldReg, unsigned NewReg,
473  unsigned PrevReg = 0);
474  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
475  unsigned &OffsetPos, unsigned &NewBase,
476  int64_t &NewOffset);
477  void postprocessDAG();
478 };
479 
480 /// A NodeSet contains a set of SUnit DAG nodes with additional information
481 /// that assigns a priority to the set.
482 class NodeSet {
483  SetVector<SUnit *> Nodes;
484  bool HasRecurrence = false;
485  unsigned RecMII = 0;
486  int MaxMOV = 0;
487  unsigned MaxDepth = 0;
488  unsigned Colocate = 0;
489  SUnit *ExceedPressure = nullptr;
490  unsigned Latency = 0;
491 
492 public:
493  using iterator = SetVector<SUnit *>::const_iterator;
494 
495  NodeSet() = default;
496  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
497  Latency = 0;
498  for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
499  for (const SDep &Succ : Nodes[i]->Succs)
500  if (Nodes.count(Succ.getSUnit()))
501  Latency += Succ.getLatency();
502  }
503 
504  bool insert(SUnit *SU) { return Nodes.insert(SU); }
505 
506  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
507 
508  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
509  return Nodes.remove_if(P);
510  }
511 
512  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
513 
514  bool hasRecurrence() { return HasRecurrence; };
515 
516  unsigned size() const { return Nodes.size(); }
517 
518  bool empty() const { return Nodes.empty(); }
519 
520  SUnit *getNode(unsigned i) const { return Nodes[i]; };
521 
522  void setRecMII(unsigned mii) { RecMII = mii; };
523 
524  void setColocate(unsigned c) { Colocate = c; };
525 
526  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
527 
528  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
529 
530  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
531 
532  int getRecMII() { return RecMII; }
533 
534  /// Summarize node functions for the entire node set.
535  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
536  for (SUnit *SU : *this) {
537  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
538  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
539  }
540  }
541 
542  unsigned getLatency() { return Latency; }
543 
544  unsigned getMaxDepth() { return MaxDepth; }
545 
546  void clear() {
547  Nodes.clear();
548  RecMII = 0;
549  HasRecurrence = false;
550  MaxMOV = 0;
551  MaxDepth = 0;
552  Colocate = 0;
553  ExceedPressure = nullptr;
554  }
555 
556  operator SetVector<SUnit *> &() { return Nodes; }
557 
558  /// Sort the node sets by importance. First, rank them by recurrence MII,
559  /// then by mobility (least mobile done first), and finally by depth.
560  /// Each node set may contain a colocate value which is used as the first
561  /// tie breaker, if it's set.
562  bool operator>(const NodeSet &RHS) const {
563  if (RecMII == RHS.RecMII) {
564  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
565  return Colocate < RHS.Colocate;
566  if (MaxMOV == RHS.MaxMOV)
567  return MaxDepth > RHS.MaxDepth;
568  return MaxMOV < RHS.MaxMOV;
569  }
570  return RecMII > RHS.RecMII;
571  }
572 
573  bool operator==(const NodeSet &RHS) const {
574  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
575  MaxDepth == RHS.MaxDepth;
576  }
577 
578  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
579 
580  iterator begin() { return Nodes.begin(); }
581  iterator end() { return Nodes.end(); }
582 
583  void print(raw_ostream &os) const {
584  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
585  << " depth " << MaxDepth << " col " << Colocate << "\n";
586  for (const auto &I : Nodes)
587  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
588  os << "\n";
589  }
590 
591 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
592  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
593 #endif
594 };
595 
596 /// This class represents the scheduled code. The main data structure is a
597 /// map from scheduled cycle to instructions. During scheduling, the
598 /// data structure explicitly represents all stages/iterations. When
599 /// the algorithm finshes, the schedule is collapsed into a single stage,
600 /// which represents instructions from different loop iterations.
601 ///
602 /// The SMS algorithm allows negative values for cycles, so the first cycle
603 /// in the schedule is the smallest cycle value.
604 class SMSchedule {
605 private:
606  /// Map from execution cycle to instructions.
607  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
608 
609  /// Map from instruction to execution cycle.
610  std::map<SUnit *, int> InstrToCycle;
611 
612  /// Map for each register and the max difference between its uses and def.
613  /// The first element in the pair is the max difference in stages. The
614  /// second is true if the register defines a Phi value and loop value is
615  /// scheduled before the Phi.
616  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
617 
618  /// Keep track of the first cycle value in the schedule. It starts
619  /// as zero, but the algorithm allows negative values.
620  int FirstCycle = 0;
621 
622  /// Keep track of the last cycle value in the schedule.
623  int LastCycle = 0;
624 
625  /// The initiation interval (II) for the schedule.
626  int InitiationInterval = 0;
627 
628  /// Target machine information.
629  const TargetSubtargetInfo &ST;
630 
631  /// Virtual register information.
633 
634  std::unique_ptr<DFAPacketizer> Resources;
635 
636 public:
637  SMSchedule(MachineFunction *mf)
638  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
639  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
640 
641  void reset() {
642  ScheduledInstrs.clear();
643  InstrToCycle.clear();
644  RegToStageDiff.clear();
645  FirstCycle = 0;
646  LastCycle = 0;
647  InitiationInterval = 0;
648  }
649 
650  /// Set the initiation interval for this schedule.
651  void setInitiationInterval(int ii) { InitiationInterval = ii; }
652 
653  /// Return the first cycle in the completed schedule. This
654  /// can be a negative value.
655  int getFirstCycle() const { return FirstCycle; }
656 
657  /// Return the last cycle in the finalized schedule.
658  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
659 
660  /// Return the cycle of the earliest scheduled instruction in the dependence
661  /// chain.
662  int earliestCycleInChain(const SDep &Dep);
663 
664  /// Return the cycle of the latest scheduled instruction in the dependence
665  /// chain.
666  int latestCycleInChain(const SDep &Dep);
667 
668  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
669  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
670  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
671 
672  /// Iterators for the cycle to instruction map.
673  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
674  using const_sched_iterator =
675  DenseMap<int, std::deque<SUnit *>>::const_iterator;
676 
677  /// Return true if the instruction is scheduled at the specified stage.
678  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
679  return (stageScheduled(SU) == (int)StageNum);
680  }
681 
682  /// Return the stage for a scheduled instruction. Return -1 if
683  /// the instruction has not been scheduled.
684  int stageScheduled(SUnit *SU) const {
685  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
686  if (it == InstrToCycle.end())
687  return -1;
688  return (it->second - FirstCycle) / InitiationInterval;
689  }
690 
691  /// Return the cycle for a scheduled instruction. This function normalizes
692  /// the first cycle to be 0.
693  unsigned cycleScheduled(SUnit *SU) const {
694  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
695  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
696  return (it->second - FirstCycle) % InitiationInterval;
697  }
698 
699  /// Return the maximum stage count needed for this schedule.
700  unsigned getMaxStageCount() {
701  return (LastCycle - FirstCycle) / InitiationInterval;
702  }
703 
704  /// Return the max. number of stages/iterations that can occur between a
705  /// register definition and its uses.
706  unsigned getStagesForReg(int Reg, unsigned CurStage) {
707  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
708  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
709  return 1;
710  return Stages.first;
711  }
712 
713  /// The number of stages for a Phi is a little different than other
714  /// instructions. The minimum value computed in RegToStageDiff is 1
715  /// because we assume the Phi is needed for at least 1 iteration.
716  /// This is not the case if the loop value is scheduled prior to the
717  /// Phi in the same stage. This function returns the number of stages
718  /// or iterations needed between the Phi definition and any uses.
719  unsigned getStagesForPhi(int Reg) {
720  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
721  if (Stages.second)
722  return Stages.first;
723  return Stages.first - 1;
724  }
725 
726  /// Return the instructions that are scheduled at the specified cycle.
727  std::deque<SUnit *> &getInstructions(int cycle) {
728  return ScheduledInstrs[cycle];
729  }
730 
731  bool isValidSchedule(SwingSchedulerDAG *SSD);
732  void finalizeSchedule(SwingSchedulerDAG *SSD);
733  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
734  std::deque<SUnit *> &Insts);
735  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
736  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
737  MachineOperand &MO);
738  void print(raw_ostream &os) const;
739  void dump() const;
740 };
741 
742 } // end anonymous namespace
743 
744 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
745 char MachinePipeliner::ID = 0;
746 #ifndef NDEBUG
747 int MachinePipeliner::NumTries = 0;
748 #endif
750 
751 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
752  "Modulo Software Pipelining", false, false)
757 INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
758  "Modulo Software Pipelining", false, false)
759 
760 /// The "main" function for implementing Swing Modulo Scheduling.
761 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
762  if (skipFunction(mf.getFunction()))
763  return false;
764 
765  if (!EnableSWP)
766  return false;
767 
768  if (mf.getFunction().getAttributes().hasAttribute(
769  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
770  !EnableSWPOptSize.getPosition())
771  return false;
772 
773  MF = &mf;
774  MLI = &getAnalysis<MachineLoopInfo>();
775  MDT = &getAnalysis<MachineDominatorTree>();
776  TII = MF->getSubtarget().getInstrInfo();
777  RegClassInfo.runOnMachineFunction(*MF);
778 
779  for (auto &L : *MLI)
780  scheduleLoop(*L);
781 
782  return false;
783 }
784 
785 /// Attempt to perform the SMS algorithm on the specified loop. This function is
786 /// the main entry point for the algorithm. The function identifies candidate
787 /// loops, calculates the minimum initiation interval, and attempts to schedule
788 /// the loop.
789 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
790  bool Changed = false;
791  for (auto &InnerLoop : L)
792  Changed |= scheduleLoop(*InnerLoop);
793 
794 #ifndef NDEBUG
795  // Stop trying after reaching the limit (if any).
796  int Limit = SwpLoopLimit;
797  if (Limit >= 0) {
798  if (NumTries >= SwpLoopLimit)
799  return Changed;
800  NumTries++;
801  }
802 #endif
803 
804  if (!canPipelineLoop(L))
805  return Changed;
806 
807  ++NumTrytoPipeline;
808 
809  Changed = swingModuloScheduler(L);
810 
811  return Changed;
812 }
813 
814 /// Return true if the loop can be software pipelined. The algorithm is
815 /// restricted to loops with a single basic block. Make sure that the
816 /// branch in the loop can be analyzed.
817 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
818  if (L.getNumBlocks() != 1)
819  return false;
820 
821  // Check if the branch can't be understood because we can't do pipelining
822  // if that's the case.
823  LI.TBB = nullptr;
824  LI.FBB = nullptr;
825  LI.BrCond.clear();
826  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
827  return false;
828 
829  LI.LoopInductionVar = nullptr;
830  LI.LoopCompare = nullptr;
831  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
832  return false;
833 
834  if (!L.getLoopPreheader())
835  return false;
836 
837  // Remove any subregisters from inputs to phi nodes.
838  preprocessPhiNodes(*L.getHeader());
839  return true;
840 }
841 
842 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
843  MachineRegisterInfo &MRI = MF->getRegInfo();
844  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
845 
846  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
847  MachineOperand &DefOp = PI.getOperand(0);
848  assert(DefOp.getSubReg() == 0);
849  auto *RC = MRI.getRegClass(DefOp.getReg());
850 
851  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
852  MachineOperand &RegOp = PI.getOperand(i);
853  if (RegOp.getSubReg() == 0)
854  continue;
855 
856  // If the operand uses a subregister, replace it with a new register
857  // without subregisters, and generate a copy to the new register.
858  unsigned NewReg = MRI.createVirtualRegister(RC);
859  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
861  const DebugLoc &DL = PredB.findDebugLoc(At);
862  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
863  .addReg(RegOp.getReg(), getRegState(RegOp),
864  RegOp.getSubReg());
865  Slots.insertMachineInstrInMaps(*Copy);
866  RegOp.setReg(NewReg);
867  RegOp.setSubReg(0);
868  }
869  }
870 }
871 
872 /// The SMS algorithm consists of the following main steps:
873 /// 1. Computation and analysis of the dependence graph.
874 /// 2. Ordering of the nodes (instructions).
875 /// 3. Attempt to Schedule the loop.
876 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
877  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
878 
879  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
880 
881  MachineBasicBlock *MBB = L.getHeader();
882  // The kernel should not include any terminator instructions. These
883  // will be added back later.
884  SMS.startBlock(MBB);
885 
886  // Compute the number of 'real' instructions in the basic block by
887  // ignoring terminators.
888  unsigned size = MBB->size();
890  E = MBB->instr_end();
891  I != E; ++I, --size)
892  ;
893 
894  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
895  SMS.schedule();
896  SMS.exitRegion();
897 
898  SMS.finishBlock();
899  return SMS.hasNewSchedule();
900 }
901 
902 /// We override the schedule function in ScheduleDAGInstrs to implement the
903 /// scheduling part of the Swing Modulo Scheduling algorithm.
904 void SwingSchedulerDAG::schedule() {
905  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
906  buildSchedGraph(AA);
907  addLoopCarriedDependences(AA);
908  updatePhiDependences();
909  Topo.InitDAGTopologicalSorting();
910  changeDependences();
911  postprocessDAG();
912  LLVM_DEBUG(dump());
913 
914  NodeSetType NodeSets;
915  findCircuits(NodeSets);
916  NodeSetType Circuits = NodeSets;
917 
918  // Calculate the MII.
919  unsigned ResMII = calculateResMII();
920  unsigned RecMII = calculateRecMII(NodeSets);
921 
922  fuseRecs(NodeSets);
923 
924  // This flag is used for testing and can cause correctness problems.
925  if (SwpIgnoreRecMII)
926  RecMII = 0;
927 
928  MII = std::max(ResMII, RecMII);
929  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
930  << ", res=" << ResMII << ")\n");
931 
932  // Can't schedule a loop without a valid MII.
933  if (MII == 0)
934  return;
935 
936  // Don't pipeline large loops.
937  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
938  return;
939 
940  computeNodeFunctions(NodeSets);
941 
942  registerPressureFilter(NodeSets);
943 
944  colocateNodeSets(NodeSets);
945 
946  checkNodeSets(NodeSets);
947 
948  LLVM_DEBUG({
949  for (auto &I : NodeSets) {
950  dbgs() << " Rec NodeSet ";
951  I.dump();
952  }
953  });
954 
955  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
956 
957  groupRemainingNodes(NodeSets);
958 
959  removeDuplicateNodes(NodeSets);
960 
961  LLVM_DEBUG({
962  for (auto &I : NodeSets) {
963  dbgs() << " NodeSet ";
964  I.dump();
965  }
966  });
967 
968  computeNodeOrder(NodeSets);
969 
970  // check for node order issues
971  checkValidNodeOrder(Circuits);
972 
973  SMSchedule Schedule(Pass.MF);
974  Scheduled = schedulePipeline(Schedule);
975 
976  if (!Scheduled)
977  return;
978 
979  unsigned numStages = Schedule.getMaxStageCount();
980  // No need to generate pipeline if there are no overlapped iterations.
981  if (numStages == 0)
982  return;
983 
984  // Check that the maximum stage count is less than user-defined limit.
985  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
986  return;
987 
988  generatePipelinedLoop(Schedule);
989  ++NumPipelined;
990 }
991 
992 /// Clean up after the software pipeliner runs.
993 void SwingSchedulerDAG::finishBlock() {
994  for (MachineInstr *I : NewMIs)
995  MF.DeleteMachineInstr(I);
996  NewMIs.clear();
997 
998  // Call the superclass.
1000 }
1001 
1002 /// Return the register values for the operands of a Phi instruction.
1003 /// This function assume the instruction is a Phi.
1005  unsigned &InitVal, unsigned &LoopVal) {
1006  assert(Phi.isPHI() && "Expecting a Phi.");
1007 
1008  InitVal = 0;
1009  LoopVal = 0;
1010  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1011  if (Phi.getOperand(i + 1).getMBB() != Loop)
1012  InitVal = Phi.getOperand(i).getReg();
1013  else
1014  LoopVal = Phi.getOperand(i).getReg();
1015 
1016  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
1017 }
1018 
1019 /// Return the Phi register value that comes from the incoming block.
1020 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1021  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1022  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
1023  return Phi.getOperand(i).getReg();
1024  return 0;
1025 }
1026 
1027 /// Return the Phi register value that comes the loop block.
1028 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1029  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1030  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
1031  return Phi.getOperand(i).getReg();
1032  return 0;
1033 }
1034 
1035 /// Return true if SUb can be reached from SUa following the chain edges.
1036 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
1037  SmallPtrSet<SUnit *, 8> Visited;
1038  SmallVector<SUnit *, 8> Worklist;
1039  Worklist.push_back(SUa);
1040  while (!Worklist.empty()) {
1041  const SUnit *SU = Worklist.pop_back_val();
1042  for (auto &SI : SU->Succs) {
1043  SUnit *SuccSU = SI.getSUnit();
1044  if (SI.getKind() == SDep::Order) {
1045  if (Visited.count(SuccSU))
1046  continue;
1047  if (SuccSU == SUb)
1048  return true;
1049  Worklist.push_back(SuccSU);
1050  Visited.insert(SuccSU);
1051  }
1052  }
1053  }
1054  return false;
1055 }
1056 
1057 /// Return true if the instruction causes a chain between memory
1058 /// references before and after it.
1060  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1061  (MI.hasOrderedMemoryRef() &&
1062  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1063 }
1064 
1065 /// Return the underlying objects for the memory references of an instruction.
1066 /// This function calls the code in ValueTracking, but first checks that the
1067 /// instruction has a memory operand.
1070  const DataLayout &DL) {
1071  if (!MI->hasOneMemOperand())
1072  return;
1073  MachineMemOperand *MM = *MI->memoperands_begin();
1074  if (!MM->getValue())
1075  return;
1076  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1077  for (Value *V : Objs) {
1078  if (!isIdentifiedObject(V)) {
1079  Objs.clear();
1080  return;
1081  }
1082  Objs.push_back(V);
1083  }
1084 }
1085 
1086 /// Add a chain edge between a load and store if the store can be an
1087 /// alias of the load on a subsequent iteration, i.e., a loop carried
1088 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1089 /// but that code doesn't create loop carried dependences.
1090 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1092  Value *UnknownValue =
1094  for (auto &SU : SUnits) {
1095  MachineInstr &MI = *SU.getInstr();
1096  if (isDependenceBarrier(MI, AA))
1097  PendingLoads.clear();
1098  else if (MI.mayLoad()) {
1100  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1101  if (Objs.empty())
1102  Objs.push_back(UnknownValue);
1103  for (auto V : Objs) {
1104  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1105  SUs.push_back(&SU);
1106  }
1107  } else if (MI.mayStore()) {
1109  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1110  if (Objs.empty())
1111  Objs.push_back(UnknownValue);
1112  for (auto V : Objs) {
1114  PendingLoads.find(V);
1115  if (I == PendingLoads.end())
1116  continue;
1117  for (auto Load : I->second) {
1118  if (isSuccOrder(Load, &SU))
1119  continue;
1120  MachineInstr &LdMI = *Load->getInstr();
1121  // First, perform the cheaper check that compares the base register.
1122  // If they are the same and the load offset is less than the store
1123  // offset, then mark the dependence as loop carried potentially.
1124  MachineOperand *BaseOp1, *BaseOp2;
1125  int64_t Offset1, Offset2;
1126  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
1127  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
1128  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
1129  (int)Offset1 < (int)Offset2) {
1130  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1131  "What happened to the chain edge?");
1132  SDep Dep(Load, SDep::Barrier);
1133  Dep.setLatency(1);
1134  SU.addPred(Dep);
1135  continue;
1136  }
1137  }
1138  // Second, the more expensive check that uses alias analysis on the
1139  // base registers. If they alias, and the load offset is less than
1140  // the store offset, the mark the dependence as loop carried.
1141  if (!AA) {
1142  SDep Dep(Load, SDep::Barrier);
1143  Dep.setLatency(1);
1144  SU.addPred(Dep);
1145  continue;
1146  }
1147  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1148  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1149  if (!MMO1->getValue() || !MMO2->getValue()) {
1150  SDep Dep(Load, SDep::Barrier);
1151  Dep.setLatency(1);
1152  SU.addPred(Dep);
1153  continue;
1154  }
1155  if (MMO1->getValue() == MMO2->getValue() &&
1156  MMO1->getOffset() <= MMO2->getOffset()) {
1157  SDep Dep(Load, SDep::Barrier);
1158  Dep.setLatency(1);
1159  SU.addPred(Dep);
1160  continue;
1161  }
1162  AliasResult AAResult = AA->alias(
1164  MMO1->getAAInfo()),
1166  MMO2->getAAInfo()));
1167 
1168  if (AAResult != NoAlias) {
1169  SDep Dep(Load, SDep::Barrier);
1170  Dep.setLatency(1);
1171  SU.addPred(Dep);
1172  }
1173  }
1174  }
1175  }
1176  }
1177 }
1178 
1179 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1180 /// processes dependences for PHIs. This function adds true dependences
1181 /// from a PHI to a use, and a loop carried dependence from the use to the
1182 /// PHI. The loop carried dependence is represented as an anti dependence
1183 /// edge. This function also removes chain dependences between unrelated
1184 /// PHIs.
1185 void SwingSchedulerDAG::updatePhiDependences() {
1186  SmallVector<SDep, 4> RemoveDeps;
1188 
1189  // Iterate over each DAG node.
1190  for (SUnit &I : SUnits) {
1191  RemoveDeps.clear();
1192  // Set to true if the instruction has an operand defined by a Phi.
1193  unsigned HasPhiUse = 0;
1194  unsigned HasPhiDef = 0;
1195  MachineInstr *MI = I.getInstr();
1196  // Iterate over each operand, and we process the definitions.
1197  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1198  MOE = MI->operands_end();
1199  MOI != MOE; ++MOI) {
1200  if (!MOI->isReg())
1201  continue;
1202  unsigned Reg = MOI->getReg();
1203  if (MOI->isDef()) {
1204  // If the register is used by a Phi, then create an anti dependence.
1206  UI = MRI.use_instr_begin(Reg),
1207  UE = MRI.use_instr_end();
1208  UI != UE; ++UI) {
1209  MachineInstr *UseMI = &*UI;
1210  SUnit *SU = getSUnit(UseMI);
1211  if (SU != nullptr && UseMI->isPHI()) {
1212  if (!MI->isPHI()) {
1213  SDep Dep(SU, SDep::Anti, Reg);
1214  Dep.setLatency(1);
1215  I.addPred(Dep);
1216  } else {
1217  HasPhiDef = Reg;
1218  // Add a chain edge to a dependent Phi that isn't an existing
1219  // predecessor.
1220  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1221  I.addPred(SDep(SU, SDep::Barrier));
1222  }
1223  }
1224  }
1225  } else if (MOI->isUse()) {
1226  // If the register is defined by a Phi, then create a true dependence.
1227  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1228  if (DefMI == nullptr)
1229  continue;
1230  SUnit *SU = getSUnit(DefMI);
1231  if (SU != nullptr && DefMI->isPHI()) {
1232  if (!MI->isPHI()) {
1233  SDep Dep(SU, SDep::Data, Reg);
1234  Dep.setLatency(0);
1235  ST.adjustSchedDependency(SU, &I, Dep);
1236  I.addPred(Dep);
1237  } else {
1238  HasPhiUse = Reg;
1239  // Add a chain edge to a dependent Phi that isn't an existing
1240  // predecessor.
1241  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1242  I.addPred(SDep(SU, SDep::Barrier));
1243  }
1244  }
1245  }
1246  }
1247  // Remove order dependences from an unrelated Phi.
1248  if (!SwpPruneDeps)
1249  continue;
1250  for (auto &PI : I.Preds) {
1251  MachineInstr *PMI = PI.getSUnit()->getInstr();
1252  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1253  if (I.getInstr()->isPHI()) {
1254  if (PMI->getOperand(0).getReg() == HasPhiUse)
1255  continue;
1256  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1257  continue;
1258  }
1259  RemoveDeps.push_back(PI);
1260  }
1261  }
1262  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1263  I.removePred(RemoveDeps[i]);
1264  }
1265 }
1266 
1267 /// Iterate over each DAG node and see if we can change any dependences
1268 /// in order to reduce the recurrence MII.
1269 void SwingSchedulerDAG::changeDependences() {
1270  // See if an instruction can use a value from the previous iteration.
1271  // If so, we update the base and offset of the instruction and change
1272  // the dependences.
1273  for (SUnit &I : SUnits) {
1274  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1275  int64_t NewOffset = 0;
1276  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1277  NewOffset))
1278  continue;
1279 
1280  // Get the MI and SUnit for the instruction that defines the original base.
1281  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1282  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1283  if (!DefMI)
1284  continue;
1285  SUnit *DefSU = getSUnit(DefMI);
1286  if (!DefSU)
1287  continue;
1288  // Get the MI and SUnit for the instruction that defins the new base.
1289  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1290  if (!LastMI)
1291  continue;
1292  SUnit *LastSU = getSUnit(LastMI);
1293  if (!LastSU)
1294  continue;
1295 
1296  if (Topo.IsReachable(&I, LastSU))
1297  continue;
1298 
1299  // Remove the dependence. The value now depends on a prior iteration.
1300  SmallVector<SDep, 4> Deps;
1301  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1302  ++P)
1303  if (P->getSUnit() == DefSU)
1304  Deps.push_back(*P);
1305  for (int i = 0, e = Deps.size(); i != e; i++) {
1306  Topo.RemovePred(&I, Deps[i].getSUnit());
1307  I.removePred(Deps[i]);
1308  }
1309  // Remove the chain dependence between the instructions.
1310  Deps.clear();
1311  for (auto &P : LastSU->Preds)
1312  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1313  Deps.push_back(P);
1314  for (int i = 0, e = Deps.size(); i != e; i++) {
1315  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1316  LastSU->removePred(Deps[i]);
1317  }
1318 
1319  // Add a dependence between the new instruction and the instruction
1320  // that defines the new base.
1321  SDep Dep(&I, SDep::Anti, NewBase);
1322  Topo.AddPred(LastSU, &I);
1323  LastSU->addPred(Dep);
1324 
1325  // Remember the base and offset information so that we can update the
1326  // instruction during code generation.
1327  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1328  }
1329 }
1330 
1331 namespace {
1332 
1333 // FuncUnitSorter - Comparison operator used to sort instructions by
1334 // the number of functional unit choices.
1335 struct FuncUnitSorter {
1336  const InstrItineraryData *InstrItins;
1337  DenseMap<unsigned, unsigned> Resources;
1338 
1339  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1340 
1341  // Compute the number of functional unit alternatives needed
1342  // at each stage, and take the minimum value. We prioritize the
1343  // instructions by the least number of choices first.
1344  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1345  unsigned schedClass = Inst->getDesc().getSchedClass();
1346  unsigned min = UINT_MAX;
1347  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1348  *IE = InstrItins->endStage(schedClass);
1349  IS != IE; ++IS) {
1350  unsigned funcUnits = IS->getUnits();
1351  unsigned numAlternatives = countPopulation(funcUnits);
1352  if (numAlternatives < min) {
1353  min = numAlternatives;
1354  F = funcUnits;
1355  }
1356  }
1357  return min;
1358  }
1359 
1360  // Compute the critical resources needed by the instruction. This
1361  // function records the functional units needed by instructions that
1362  // must use only one functional unit. We use this as a tie breaker
1363  // for computing the resource MII. The instrutions that require
1364  // the same, highly used, functional unit have high priority.
1365  void calcCriticalResources(MachineInstr &MI) {
1366  unsigned SchedClass = MI.getDesc().getSchedClass();
1367  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1368  *IE = InstrItins->endStage(SchedClass);
1369  IS != IE; ++IS) {
1370  unsigned FuncUnits = IS->getUnits();
1371  if (countPopulation(FuncUnits) == 1)
1372  Resources[FuncUnits]++;
1373  }
1374  }
1375 
1376  /// Return true if IS1 has less priority than IS2.
1377  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1378  unsigned F1 = 0, F2 = 0;
1379  unsigned MFUs1 = minFuncUnits(IS1, F1);
1380  unsigned MFUs2 = minFuncUnits(IS2, F2);
1381  if (MFUs1 == 1 && MFUs2 == 1)
1382  return Resources.lookup(F1) < Resources.lookup(F2);
1383  return MFUs1 > MFUs2;
1384  }
1385 };
1386 
1387 } // end anonymous namespace
1388 
1389 /// Calculate the resource constrained minimum initiation interval for the
1390 /// specified loop. We use the DFA to model the resources needed for
1391 /// each instruction, and we ignore dependences. A different DFA is created
1392 /// for each cycle that is required. When adding a new instruction, we attempt
1393 /// to add it to each existing DFA, until a legal space is found. If the
1394 /// instruction cannot be reserved in an existing DFA, we create a new one.
1395 unsigned SwingSchedulerDAG::calculateResMII() {
1397  MachineBasicBlock *MBB = Loop.getHeader();
1398  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1399 
1400  // Sort the instructions by the number of available choices for scheduling,
1401  // least to most. Use the number of critical resources as the tie breaker.
1402  FuncUnitSorter FUS =
1403  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1405  E = MBB->getFirstTerminator();
1406  I != E; ++I)
1407  FUS.calcCriticalResources(*I);
1409  FuncUnitOrder(FUS);
1410 
1412  E = MBB->getFirstTerminator();
1413  I != E; ++I)
1414  FuncUnitOrder.push(&*I);
1415 
1416  while (!FuncUnitOrder.empty()) {
1417  MachineInstr *MI = FuncUnitOrder.top();
1418  FuncUnitOrder.pop();
1419  if (TII->isZeroCost(MI->getOpcode()))
1420  continue;
1421  // Attempt to reserve the instruction in an existing DFA. At least one
1422  // DFA is needed for each cycle.
1423  unsigned NumCycles = getSUnit(MI)->Latency;
1424  unsigned ReservedCycles = 0;
1427  for (unsigned C = 0; C < NumCycles; ++C)
1428  while (RI != RE) {
1429  if ((*RI++)->canReserveResources(*MI)) {
1430  ++ReservedCycles;
1431  break;
1432  }
1433  }
1434  // Start reserving resources using existing DFAs.
1435  for (unsigned C = 0; C < ReservedCycles; ++C) {
1436  --RI;
1437  (*RI)->reserveResources(*MI);
1438  }
1439  // Add new DFAs, if needed, to reserve resources.
1440  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1441  DFAPacketizer *NewResource =
1443  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1444  NewResource->reserveResources(*MI);
1445  Resources.push_back(NewResource);
1446  }
1447  }
1448  int Resmii = Resources.size();
1449  // Delete the memory for each of the DFAs that were created earlier.
1450  for (DFAPacketizer *RI : Resources) {
1451  DFAPacketizer *D = RI;
1452  delete D;
1453  }
1454  Resources.clear();
1455  return Resmii;
1456 }
1457 
1458 /// Calculate the recurrence-constrainted minimum initiation interval.
1459 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1460 /// for each circuit. The II needs to satisfy the inequality
1461 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1462 /// II that satisfies the inequality, and the RecMII is the maximum
1463 /// of those values.
1464 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1465  unsigned RecMII = 0;
1466 
1467  for (NodeSet &Nodes : NodeSets) {
1468  if (Nodes.empty())
1469  continue;
1470 
1471  unsigned Delay = Nodes.getLatency();
1472  unsigned Distance = 1;
1473 
1474  // ii = ceil(delay / distance)
1475  unsigned CurMII = (Delay + Distance - 1) / Distance;
1476  Nodes.setRecMII(CurMII);
1477  if (CurMII > RecMII)
1478  RecMII = CurMII;
1479  }
1480 
1481  return RecMII;
1482 }
1483 
1484 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1485 /// but we do this to find the circuits, and then change them back.
1486 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1488  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1489  SUnit *SU = &SUnits[i];
1490  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1491  IP != EP; ++IP) {
1492  if (IP->getKind() != SDep::Anti)
1493  continue;
1494  DepsAdded.push_back(std::make_pair(SU, *IP));
1495  }
1496  }
1497  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1498  E = DepsAdded.end();
1499  I != E; ++I) {
1500  // Remove this anti dependency and add one in the reverse direction.
1501  SUnit *SU = I->first;
1502  SDep &D = I->second;
1503  SUnit *TargetSU = D.getSUnit();
1504  unsigned Reg = D.getReg();
1505  unsigned Lat = D.getLatency();
1506  SU->removePred(D);
1507  SDep Dep(SU, SDep::Anti, Reg);
1508  Dep.setLatency(Lat);
1509  TargetSU->addPred(Dep);
1510  }
1511 }
1512 
1513 /// Create the adjacency structure of the nodes in the graph.
1514 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1515  SwingSchedulerDAG *DAG) {
1516  BitVector Added(SUnits.size());
1517  DenseMap<int, int> OutputDeps;
1518  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1519  Added.reset();
1520  // Add any successor to the adjacency matrix and exclude duplicates.
1521  for (auto &SI : SUnits[i].Succs) {
1522  // Only create a back-edge on the first and last nodes of a dependence
1523  // chain. This records any chains and adds them later.
1524  if (SI.getKind() == SDep::Output) {
1525  int N = SI.getSUnit()->NodeNum;
1526  int BackEdge = i;
1527  auto Dep = OutputDeps.find(BackEdge);
1528  if (Dep != OutputDeps.end()) {
1529  BackEdge = Dep->second;
1530  OutputDeps.erase(Dep);
1531  }
1532  OutputDeps[N] = BackEdge;
1533  }
1534  // Do not process a boundary node, an artificial node.
1535  // A back-edge is processed only if it goes to a Phi.
1536  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1537  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1538  continue;
1539  int N = SI.getSUnit()->NodeNum;
1540  if (!Added.test(N)) {
1541  AdjK[i].push_back(N);
1542  Added.set(N);
1543  }
1544  }
1545  // A chain edge between a store and a load is treated as a back-edge in the
1546  // adjacency matrix.
1547  for (auto &PI : SUnits[i].Preds) {
1548  if (!SUnits[i].getInstr()->mayStore() ||
1549  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1550  continue;
1551  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1552  int N = PI.getSUnit()->NodeNum;
1553  if (!Added.test(N)) {
1554  AdjK[i].push_back(N);
1555  Added.set(N);
1556  }
1557  }
1558  }
1559  }
1560  // Add back-eges in the adjacency matrix for the output dependences.
1561  for (auto &OD : OutputDeps)
1562  if (!Added.test(OD.second)) {
1563  AdjK[OD.first].push_back(OD.second);
1564  Added.set(OD.second);
1565  }
1566 }
1567 
1568 /// Identify an elementary circuit in the dependence graph starting at the
1569 /// specified node.
1570 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1571  bool HasBackedge) {
1572  SUnit *SV = &SUnits[V];
1573  bool F = false;
1574  Stack.insert(SV);
1575  Blocked.set(V);
1576 
1577  for (auto W : AdjK[V]) {
1578  if (NumPaths > MaxPaths)
1579  break;
1580  if (W < S)
1581  continue;
1582  if (W == S) {
1583  if (!HasBackedge)
1584  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1585  F = true;
1586  ++NumPaths;
1587  break;
1588  } else if (!Blocked.test(W)) {
1589  if (circuit(W, S, NodeSets,
1590  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1591  F = true;
1592  }
1593  }
1594 
1595  if (F)
1596  unblock(V);
1597  else {
1598  for (auto W : AdjK[V]) {
1599  if (W < S)
1600  continue;
1601  if (B[W].count(SV) == 0)
1602  B[W].insert(SV);
1603  }
1604  }
1605  Stack.pop_back();
1606  return F;
1607 }
1608 
1609 /// Unblock a node in the circuit finding algorithm.
1610 void SwingSchedulerDAG::Circuits::unblock(int U) {
1611  Blocked.reset(U);
1612  SmallPtrSet<SUnit *, 4> &BU = B[U];
1613  while (!BU.empty()) {
1615  assert(SI != BU.end() && "Invalid B set.");
1616  SUnit *W = *SI;
1617  BU.erase(W);
1618  if (Blocked.test(W->NodeNum))
1619  unblock(W->NodeNum);
1620  }
1621 }
1622 
1623 /// Identify all the elementary circuits in the dependence graph using
1624 /// Johnson's circuit algorithm.
1625 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1626  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1627  // but we do this to find the circuits, and then change them back.
1628  swapAntiDependences(SUnits);
1629 
1630  Circuits Cir(SUnits, Topo);
1631  // Create the adjacency structure.
1632  Cir.createAdjacencyStructure(this);
1633  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1634  Cir.reset();
1635  Cir.circuit(i, i, NodeSets);
1636  }
1637 
1638  // Change the dependences back so that we've created a DAG again.
1639  swapAntiDependences(SUnits);
1640 }
1641 
1642 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1643 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1644 // additional copies that are needed across iterations. An artificial dependence
1645 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1646 
1647 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1648 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1649 // PHI-------True-Dep------> USEOfPhi
1650 
1651 // The mutation creates
1652 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1653 
1654 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1655 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1656 // late to avoid additional copies across iterations. The possible scheduling
1657 // order would be
1658 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1659 
1661  for (SUnit &SU : DAG->SUnits) {
1662  // Find the COPY/REG_SEQUENCE instruction.
1663  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1664  continue;
1665 
1666  // Record the loop carried PHIs.
1667  SmallVector<SUnit *, 4> PHISUs;
1668  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1669  SmallVector<SUnit *, 4> SrcSUs;
1670 
1671  for (auto &Dep : SU.Preds) {
1672  SUnit *TmpSU = Dep.getSUnit();
1673  MachineInstr *TmpMI = TmpSU->getInstr();
1674  SDep::Kind DepKind = Dep.getKind();
1675  // Save the loop carried PHI.
1676  if (DepKind == SDep::Anti && TmpMI->isPHI())
1677  PHISUs.push_back(TmpSU);
1678  // Save the source of COPY/REG_SEQUENCE.
1679  // If the source has no pre-decessors, we will end up creating cycles.
1680  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1681  SrcSUs.push_back(TmpSU);
1682  }
1683 
1684  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1685  continue;
1686 
1687  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1688  // SUnit to the container.
1689  SmallVector<SUnit *, 8> UseSUs;
1690  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1691  for (auto &Dep : (*I)->Succs) {
1692  if (Dep.getKind() != SDep::Data)
1693  continue;
1694 
1695  SUnit *TmpSU = Dep.getSUnit();
1696  MachineInstr *TmpMI = TmpSU->getInstr();
1697  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1698  PHISUs.push_back(TmpSU);
1699  continue;
1700  }
1701  UseSUs.push_back(TmpSU);
1702  }
1703  }
1704 
1705  if (UseSUs.size() == 0)
1706  continue;
1707 
1708  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1709  // Add the artificial dependencies if it does not form a cycle.
1710  for (auto I : UseSUs) {
1711  for (auto Src : SrcSUs) {
1712  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1713  Src->addPred(SDep(I, SDep::Artificial));
1714  SDAG->Topo.AddPred(Src, I);
1715  }
1716  }
1717  }
1718  }
1719 }
1720 
1721 /// Return true for DAG nodes that we ignore when computing the cost functions.
1722 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1723 /// in the calculation of the ASAP, ALAP, etc functions.
1724 static bool ignoreDependence(const SDep &D, bool isPred) {
1725  if (D.isArtificial())
1726  return true;
1727  return D.getKind() == SDep::Anti && isPred;
1728 }
1729 
1730 /// Compute several functions need to order the nodes for scheduling.
1731 /// ASAP - Earliest time to schedule a node.
1732 /// ALAP - Latest time to schedule a node.
1733 /// MOV - Mobility function, difference between ALAP and ASAP.
1734 /// D - Depth of each node.
1735 /// H - Height of each node.
1736 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1737  ScheduleInfo.resize(SUnits.size());
1738 
1739  LLVM_DEBUG({
1740  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1741  E = Topo.end();
1742  I != E; ++I) {
1743  const SUnit &SU = SUnits[*I];
1744  dumpNode(SU);
1745  }
1746  });
1747 
1748  int maxASAP = 0;
1749  // Compute ASAP and ZeroLatencyDepth.
1750  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1751  E = Topo.end();
1752  I != E; ++I) {
1753  int asap = 0;
1754  int zeroLatencyDepth = 0;
1755  SUnit *SU = &SUnits[*I];
1756  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1757  EP = SU->Preds.end();
1758  IP != EP; ++IP) {
1759  SUnit *pred = IP->getSUnit();
1760  if (IP->getLatency() == 0)
1761  zeroLatencyDepth =
1762  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1763  if (ignoreDependence(*IP, true))
1764  continue;
1765  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1766  getDistance(pred, SU, *IP) * MII));
1767  }
1768  maxASAP = std::max(maxASAP, asap);
1769  ScheduleInfo[*I].ASAP = asap;
1770  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1771  }
1772 
1773  // Compute ALAP, ZeroLatencyHeight, and MOV.
1775  E = Topo.rend();
1776  I != E; ++I) {
1777  int alap = maxASAP;
1778  int zeroLatencyHeight = 0;
1779  SUnit *SU = &SUnits[*I];
1780  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1781  ES = SU->Succs.end();
1782  IS != ES; ++IS) {
1783  SUnit *succ = IS->getSUnit();
1784  if (IS->getLatency() == 0)
1785  zeroLatencyHeight =
1786  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1787  if (ignoreDependence(*IS, true))
1788  continue;
1789  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1790  getDistance(SU, succ, *IS) * MII));
1791  }
1792 
1793  ScheduleInfo[*I].ALAP = alap;
1794  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1795  }
1796 
1797  // After computing the node functions, compute the summary for each node set.
1798  for (NodeSet &I : NodeSets)
1799  I.computeNodeSetInfo(this);
1800 
1801  LLVM_DEBUG({
1802  for (unsigned i = 0; i < SUnits.size(); i++) {
1803  dbgs() << "\tNode " << i << ":\n";
1804  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1805  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1806  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1807  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1808  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1809  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1810  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1811  }
1812  });
1813 }
1814 
1815 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1816 /// as the predecessors of the elements of NodeOrder that are not also in
1817 /// NodeOrder.
1818 static bool pred_L(SetVector<SUnit *> &NodeOrder,
1820  const NodeSet *S = nullptr) {
1821  Preds.clear();
1822  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1823  I != E; ++I) {
1824  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1825  PI != PE; ++PI) {
1826  if (S && S->count(PI->getSUnit()) == 0)
1827  continue;
1828  if (ignoreDependence(*PI, true))
1829  continue;
1830  if (NodeOrder.count(PI->getSUnit()) == 0)
1831  Preds.insert(PI->getSUnit());
1832  }
1833  // Back-edges are predecessors with an anti-dependence.
1834  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1835  ES = (*I)->Succs.end();
1836  IS != ES; ++IS) {
1837  if (IS->getKind() != SDep::Anti)
1838  continue;
1839  if (S && S->count(IS->getSUnit()) == 0)
1840  continue;
1841  if (NodeOrder.count(IS->getSUnit()) == 0)
1842  Preds.insert(IS->getSUnit());
1843  }
1844  }
1845  return !Preds.empty();
1846 }
1847 
1848 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1849 /// as the successors of the elements of NodeOrder that are not also in
1850 /// NodeOrder.
1851 static bool succ_L(SetVector<SUnit *> &NodeOrder,
1853  const NodeSet *S = nullptr) {
1854  Succs.clear();
1855  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1856  I != E; ++I) {
1857  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1858  SI != SE; ++SI) {
1859  if (S && S->count(SI->getSUnit()) == 0)
1860  continue;
1861  if (ignoreDependence(*SI, false))
1862  continue;
1863  if (NodeOrder.count(SI->getSUnit()) == 0)
1864  Succs.insert(SI->getSUnit());
1865  }
1866  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1867  PE = (*I)->Preds.end();
1868  PI != PE; ++PI) {
1869  if (PI->getKind() != SDep::Anti)
1870  continue;
1871  if (S && S->count(PI->getSUnit()) == 0)
1872  continue;
1873  if (NodeOrder.count(PI->getSUnit()) == 0)
1874  Succs.insert(PI->getSUnit());
1875  }
1876  }
1877  return !Succs.empty();
1878 }
1879 
1880 /// Return true if there is a path from the specified node to any of the nodes
1881 /// in DestNodes. Keep track and return the nodes in any path.
1882 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1883  SetVector<SUnit *> &DestNodes,
1884  SetVector<SUnit *> &Exclude,
1885  SmallPtrSet<SUnit *, 8> &Visited) {
1886  if (Cur->isBoundaryNode())
1887  return false;
1888  if (Exclude.count(Cur) != 0)
1889  return false;
1890  if (DestNodes.count(Cur) != 0)
1891  return true;
1892  if (!Visited.insert(Cur).second)
1893  return Path.count(Cur) != 0;
1894  bool FoundPath = false;
1895  for (auto &SI : Cur->Succs)
1896  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1897  for (auto &PI : Cur->Preds)
1898  if (PI.getKind() == SDep::Anti)
1899  FoundPath |=
1900  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1901  if (FoundPath)
1902  Path.insert(Cur);
1903  return FoundPath;
1904 }
1905 
1906 /// Return true if Set1 is a subset of Set2.
1907 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1908  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1909  if (Set2.count(*I) == 0)
1910  return false;
1911  return true;
1912 }
1913 
1914 /// Compute the live-out registers for the instructions in a node-set.
1915 /// The live-out registers are those that are defined in the node-set,
1916 /// but not used. Except for use operands of Phis.
1918  NodeSet &NS) {
1920  MachineRegisterInfo &MRI = MF.getRegInfo();
1922  SmallSet<unsigned, 4> Uses;
1923  for (SUnit *SU : NS) {
1924  const MachineInstr *MI = SU->getInstr();
1925  if (MI->isPHI())
1926  continue;
1927  for (const MachineOperand &MO : MI->operands())
1928  if (MO.isReg() && MO.isUse()) {
1929  unsigned Reg = MO.getReg();
1931  Uses.insert(Reg);
1932  else if (MRI.isAllocatable(Reg))
1933  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1934  Uses.insert(*Units);
1935  }
1936  }
1937  for (SUnit *SU : NS)
1938  for (const MachineOperand &MO : SU->getInstr()->operands())
1939  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1940  unsigned Reg = MO.getReg();
1942  if (!Uses.count(Reg))
1943  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1945  } else if (MRI.isAllocatable(Reg)) {
1946  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1947  if (!Uses.count(*Units))
1948  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1950  }
1951  }
1952  RPTracker.addLiveRegs(LiveOutRegs);
1953 }
1954 
1955 /// A heuristic to filter nodes in recurrent node-sets if the register
1956 /// pressure of a set is too high.
1957 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1958  for (auto &NS : NodeSets) {
1959  // Skip small node-sets since they won't cause register pressure problems.
1960  if (NS.size() <= 2)
1961  continue;
1962  IntervalPressure RecRegPressure;
1963  RegPressureTracker RecRPTracker(RecRegPressure);
1964  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1965  computeLiveOuts(MF, RecRPTracker, NS);
1966  RecRPTracker.closeBottom();
1967 
1968  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1969  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1970  return A->NodeNum > B->NodeNum;
1971  });
1972 
1973  for (auto &SU : SUnits) {
1974  // Since we're computing the register pressure for a subset of the
1975  // instructions in a block, we need to set the tracker for each
1976  // instruction in the node-set. The tracker is set to the instruction
1977  // just after the one we're interested in.
1979  RecRPTracker.setPos(std::next(CurInstI));
1980 
1981  RegPressureDelta RPDelta;
1982  ArrayRef<PressureChange> CriticalPSets;
1983  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1984  CriticalPSets,
1985  RecRegPressure.MaxSetPressure);
1986  if (RPDelta.Excess.isValid()) {
1987  LLVM_DEBUG(
1988  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1989  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1990  << ":" << RPDelta.Excess.getUnitInc());
1991  NS.setExceedPressure(SU);
1992  break;
1993  }
1994  RecRPTracker.recede();
1995  }
1996  }
1997 }
1998 
1999 /// A heuristic to colocate node sets that have the same set of
2000 /// successors.
2001 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2002  unsigned Colocate = 0;
2003  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
2004  NodeSet &N1 = NodeSets[i];
2006  if (N1.empty() || !succ_L(N1, S1))
2007  continue;
2008  for (int j = i + 1; j < e; ++j) {
2009  NodeSet &N2 = NodeSets[j];
2010  if (N1.compareRecMII(N2) != 0)
2011  continue;
2013  if (N2.empty() || !succ_L(N2, S2))
2014  continue;
2015  if (isSubset(S1, S2) && S1.size() == S2.size()) {
2016  N1.setColocate(++Colocate);
2017  N2.setColocate(Colocate);
2018  break;
2019  }
2020  }
2021  }
2022 }
2023 
2024 /// Check if the existing node-sets are profitable. If not, then ignore the
2025 /// recurrent node-sets, and attempt to schedule all nodes together. This is
2026 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
2027 /// then it's best to try to schedule all instructions together instead of
2028 /// starting with the recurrent node-sets.
2029 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2030  // Look for loops with a large MII.
2031  if (MII < 17)
2032  return;
2033  // Check if the node-set contains only a simple add recurrence.
2034  for (auto &NS : NodeSets) {
2035  if (NS.getRecMII() > 2)
2036  return;
2037  if (NS.getMaxDepth() > MII)
2038  return;
2039  }
2040  NodeSets.clear();
2041  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2042  return;
2043 }
2044 
2045 /// Add the nodes that do not belong to a recurrence set into groups
2046 /// based upon connected componenets.
2047 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2048  SetVector<SUnit *> NodesAdded;
2049  SmallPtrSet<SUnit *, 8> Visited;
2050  // Add the nodes that are on a path between the previous node sets and
2051  // the current node set.
2052  for (NodeSet &I : NodeSets) {
2054  // Add the nodes from the current node set to the previous node set.
2055  if (succ_L(I, N)) {
2056  SetVector<SUnit *> Path;
2057  for (SUnit *NI : N) {
2058  Visited.clear();
2059  computePath(NI, Path, NodesAdded, I, Visited);
2060  }
2061  if (!Path.empty())
2062  I.insert(Path.begin(), Path.end());
2063  }
2064  // Add the nodes from the previous node set to the current node set.
2065  N.clear();
2066  if (succ_L(NodesAdded, N)) {
2067  SetVector<SUnit *> Path;
2068  for (SUnit *NI : N) {
2069  Visited.clear();
2070  computePath(NI, Path, I, NodesAdded, Visited);
2071  }
2072  if (!Path.empty())
2073  I.insert(Path.begin(), Path.end());
2074  }
2075  NodesAdded.insert(I.begin(), I.end());
2076  }
2077 
2078  // Create a new node set with the connected nodes of any successor of a node
2079  // in a recurrent set.
2080  NodeSet NewSet;
2082  if (succ_L(NodesAdded, N))
2083  for (SUnit *I : N)
2084  addConnectedNodes(I, NewSet, NodesAdded);
2085  if (!NewSet.empty())
2086  NodeSets.push_back(NewSet);
2087 
2088  // Create a new node set with the connected nodes of any predecessor of a node
2089  // in a recurrent set.
2090  NewSet.clear();
2091  if (pred_L(NodesAdded, N))
2092  for (SUnit *I : N)
2093  addConnectedNodes(I, NewSet, NodesAdded);
2094  if (!NewSet.empty())
2095  NodeSets.push_back(NewSet);
2096 
2097  // Create new nodes sets with the connected nodes any remaining node that
2098  // has no predecessor.
2099  for (unsigned i = 0; i < SUnits.size(); ++i) {
2100  SUnit *SU = &SUnits[i];
2101  if (NodesAdded.count(SU) == 0) {
2102  NewSet.clear();
2103  addConnectedNodes(SU, NewSet, NodesAdded);
2104  if (!NewSet.empty())
2105  NodeSets.push_back(NewSet);
2106  }
2107  }
2108 }
2109 
2110 /// Add the node to the set, and add all is its connected nodes to the set.
2111 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2112  SetVector<SUnit *> &NodesAdded) {
2113  NewSet.insert(SU);
2114  NodesAdded.insert(SU);
2115  for (auto &SI : SU->Succs) {
2116  SUnit *Successor = SI.getSUnit();
2117  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
2118  addConnectedNodes(Successor, NewSet, NodesAdded);
2119  }
2120  for (auto &PI : SU->Preds) {
2121  SUnit *Predecessor = PI.getSUnit();
2122  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
2123  addConnectedNodes(Predecessor, NewSet, NodesAdded);
2124  }
2125 }
2126 
2127 /// Return true if Set1 contains elements in Set2. The elements in common
2128 /// are returned in a different container.
2129 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
2130  SmallSetVector<SUnit *, 8> &Result) {
2131  Result.clear();
2132  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
2133  SUnit *SU = Set1[i];
2134  if (Set2.count(SU) != 0)
2135  Result.insert(SU);
2136  }
2137  return !Result.empty();
2138 }
2139 
2140 /// Merge the recurrence node sets that have the same initial node.
2141 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2142  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2143  ++I) {
2144  NodeSet &NI = *I;
2145  for (NodeSetType::iterator J = I + 1; J != E;) {
2146  NodeSet &NJ = *J;
2147  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
2148  if (NJ.compareRecMII(NI) > 0)
2149  NI.setRecMII(NJ.getRecMII());
2150  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
2151  ++NII)
2152  I->insert(*NII);
2153  NodeSets.erase(J);
2154  E = NodeSets.end();
2155  } else {
2156  ++J;
2157  }
2158  }
2159  }
2160 }
2161 
2162 /// Remove nodes that have been scheduled in previous NodeSets.
2163 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2164  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2165  ++I)
2166  for (NodeSetType::iterator J = I + 1; J != E;) {
2167  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2168 
2169  if (J->empty()) {
2170  NodeSets.erase(J);
2171  E = NodeSets.end();
2172  } else {
2173  ++J;
2174  }
2175  }
2176 }
2177 
2178 /// Compute an ordered list of the dependence graph nodes, which
2179 /// indicates the order that the nodes will be scheduled. This is a
2180 /// two-level algorithm. First, a partial order is created, which
2181 /// consists of a list of sets ordered from highest to lowest priority.
2182 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2184  NodeOrder.clear();
2185 
2186  for (auto &Nodes : NodeSets) {
2187  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2188  OrderKind Order;
2190  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2191  R.insert(N.begin(), N.end());
2192  Order = BottomUp;
2193  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
2194  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2195  R.insert(N.begin(), N.end());
2196  Order = TopDown;
2197  LLVM_DEBUG(dbgs() << " Top down (succs) ");
2198  } else if (isIntersect(N, Nodes, R)) {
2199  // If some of the successors are in the existing node-set, then use the
2200  // top-down ordering.
2201  Order = TopDown;
2202  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
2203  } else if (NodeSets.size() == 1) {
2204  for (auto &N : Nodes)
2205  if (N->Succs.size() == 0)
2206  R.insert(N);
2207  Order = BottomUp;
2208  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
2209  } else {
2210  // Find the node with the highest ASAP.
2211  SUnit *maxASAP = nullptr;
2212  for (SUnit *SU : Nodes) {
2213  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
2214  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2215  maxASAP = SU;
2216  }
2217  R.insert(maxASAP);
2218  Order = BottomUp;
2219  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
2220  }
2221 
2222  while (!R.empty()) {
2223  if (Order == TopDown) {
2224  // Choose the node with the maximum height. If more than one, choose
2225  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
2226  // choose the node with the lowest MOV.
2227  while (!R.empty()) {
2228  SUnit *maxHeight = nullptr;
2229  for (SUnit *I : R) {
2230  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2231  maxHeight = I;
2232  else if (getHeight(I) == getHeight(maxHeight) &&
2233  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
2234  maxHeight = I;
2235  else if (getHeight(I) == getHeight(maxHeight) &&
2236  getZeroLatencyHeight(I) ==
2237  getZeroLatencyHeight(maxHeight) &&
2238  getMOV(I) < getMOV(maxHeight))
2239  maxHeight = I;
2240  }
2241  NodeOrder.insert(maxHeight);
2242  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
2243  R.remove(maxHeight);
2244  for (const auto &I : maxHeight->Succs) {
2245  if (Nodes.count(I.getSUnit()) == 0)
2246  continue;
2247  if (NodeOrder.count(I.getSUnit()) != 0)
2248  continue;
2249  if (ignoreDependence(I, false))
2250  continue;
2251  R.insert(I.getSUnit());
2252  }
2253  // Back-edges are predecessors with an anti-dependence.
2254  for (const auto &I : maxHeight->Preds) {
2255  if (I.getKind() != SDep::Anti)
2256  continue;
2257  if (Nodes.count(I.getSUnit()) == 0)
2258  continue;
2259  if (NodeOrder.count(I.getSUnit()) != 0)
2260  continue;
2261  R.insert(I.getSUnit());
2262  }
2263  }
2264  Order = BottomUp;
2265  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2267  if (pred_L(NodeOrder, N, &Nodes))
2268  R.insert(N.begin(), N.end());
2269  } else {
2270  // Choose the node with the maximum depth. If more than one, choose
2271  // the node with the maximum ZeroLatencyDepth. If still more than one,
2272  // choose the node with the lowest MOV.
2273  while (!R.empty()) {
2274  SUnit *maxDepth = nullptr;
2275  for (SUnit *I : R) {
2276  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2277  maxDepth = I;
2278  else if (getDepth(I) == getDepth(maxDepth) &&
2279  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
2280  maxDepth = I;
2281  else if (getDepth(I) == getDepth(maxDepth) &&
2282  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
2283  getMOV(I) < getMOV(maxDepth))
2284  maxDepth = I;
2285  }
2286  NodeOrder.insert(maxDepth);
2287  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
2288  R.remove(maxDepth);
2289  if (Nodes.isExceedSU(maxDepth)) {
2290  Order = TopDown;
2291  R.clear();
2292  R.insert(Nodes.getNode(0));
2293  break;
2294  }
2295  for (const auto &I : maxDepth->Preds) {
2296  if (Nodes.count(I.getSUnit()) == 0)
2297  continue;
2298  if (NodeOrder.count(I.getSUnit()) != 0)
2299  continue;
2300  R.insert(I.getSUnit());
2301  }
2302  // Back-edges are predecessors with an anti-dependence.
2303  for (const auto &I : maxDepth->Succs) {
2304  if (I.getKind() != SDep::Anti)
2305  continue;
2306  if (Nodes.count(I.getSUnit()) == 0)
2307  continue;
2308  if (NodeOrder.count(I.getSUnit()) != 0)
2309  continue;
2310  R.insert(I.getSUnit());
2311  }
2312  }
2313  Order = TopDown;
2314  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2316  if (succ_L(NodeOrder, N, &Nodes))
2317  R.insert(N.begin(), N.end());
2318  }
2319  }
2320  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2321  }
2322 
2323  LLVM_DEBUG({
2324  dbgs() << "Node order: ";
2325  for (SUnit *I : NodeOrder)
2326  dbgs() << " " << I->NodeNum << " ";
2327  dbgs() << "\n";
2328  });
2329 }
2330 
2331 /// Process the nodes in the computed order and create the pipelined schedule
2332 /// of the instructions, if possible. Return true if a schedule is found.
2333 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2334  if (NodeOrder.empty())
2335  return false;
2336 
2337  bool scheduleFound = false;
2338  // Keep increasing II until a valid schedule is found.
2339  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2340  Schedule.reset();
2341  Schedule.setInitiationInterval(II);
2342  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2343 
2344  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2345  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2346  do {
2347  SUnit *SU = *NI;
2348 
2349  // Compute the schedule time for the instruction, which is based
2350  // upon the scheduled time for any predecessors/successors.
2351  int EarlyStart = INT_MIN;
2352  int LateStart = INT_MAX;
2353  // These values are set when the size of the schedule window is limited
2354  // due to chain dependences.
2355  int SchedEnd = INT_MAX;
2356  int SchedStart = INT_MIN;
2357  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2358  II, this);
2359  LLVM_DEBUG({
2360  dbgs() << "Inst (" << SU->NodeNum << ") ";
2361  SU->getInstr()->dump();
2362  dbgs() << "\n";
2363  });
2364  LLVM_DEBUG({
2365  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2366  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2367  });
2368 
2369  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2370  SchedStart > LateStart)
2371  scheduleFound = false;
2372  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2373  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2374  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2375  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2376  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2377  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2378  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2379  SchedEnd =
2380  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2381  // When scheduling a Phi it is better to start at the late cycle and go
2382  // backwards. The default order may insert the Phi too far away from
2383  // its first dependence.
2384  if (SU->getInstr()->isPHI())
2385  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2386  else
2387  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2388  } else {
2389  int FirstCycle = Schedule.getFirstCycle();
2390  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2391  FirstCycle + getASAP(SU) + II - 1, II);
2392  }
2393  // Even if we find a schedule, make sure the schedule doesn't exceed the
2394  // allowable number of stages. We keep trying if this happens.
2395  if (scheduleFound)
2396  if (SwpMaxStages > -1 &&
2397  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2398  scheduleFound = false;
2399 
2400  LLVM_DEBUG({
2401  if (!scheduleFound)
2402  dbgs() << "\tCan't schedule\n";
2403  });
2404  } while (++NI != NE && scheduleFound);
2405 
2406  // If a schedule is found, check if it is a valid schedule too.
2407  if (scheduleFound)
2408  scheduleFound = Schedule.isValidSchedule(this);
2409  }
2410 
2411  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2412 
2413  if (scheduleFound)
2414  Schedule.finalizeSchedule(this);
2415  else
2416  Schedule.reset();
2417 
2418  return scheduleFound && Schedule.getMaxStageCount() > 0;
2419 }
2420 
2421 /// Given a schedule for the loop, generate a new version of the loop,
2422 /// and replace the old version. This function generates a prolog
2423 /// that contains the initial iterations in the pipeline, and kernel
2424 /// loop, and the epilogue that contains the code for the final
2425 /// iterations.
2426 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2427  // Create a new basic block for the kernel and add it to the CFG.
2428  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2429 
2430  unsigned MaxStageCount = Schedule.getMaxStageCount();
2431 
2432  // Remember the registers that are used in different stages. The index is
2433  // the iteration, or stage, that the instruction is scheduled in. This is
2434  // a map between register names in the original block and the names created
2435  // in each stage of the pipelined loop.
2436  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2437  InstrMapTy InstrMap;
2438 
2440  // Generate the prolog instructions that set up the pipeline.
2441  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2442  MF.insert(BB->getIterator(), KernelBB);
2443 
2444  // Rearrange the instructions to generate the new, pipelined loop,
2445  // and update register names as needed.
2446  for (int Cycle = Schedule.getFirstCycle(),
2447  LastCycle = Schedule.getFinalCycle();
2448  Cycle <= LastCycle; ++Cycle) {
2449  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2450  // This inner loop schedules each instruction in the cycle.
2451  for (SUnit *CI : CycleInstrs) {
2452  if (CI->getInstr()->isPHI())
2453  continue;
2454  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2455  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2456  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2457  KernelBB->push_back(NewMI);
2458  InstrMap[NewMI] = CI->getInstr();
2459  }
2460  }
2461 
2462  // Copy any terminator instructions to the new kernel, and update
2463  // names as needed.
2464  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2465  E = BB->instr_end();
2466  I != E; ++I) {
2467  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2468  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2469  KernelBB->push_back(NewMI);
2470  InstrMap[NewMI] = &*I;
2471  }
2472 
2473  KernelBB->transferSuccessors(BB);
2474  KernelBB->replaceSuccessor(BB, KernelBB);
2475 
2476  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2477  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2478  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2479  InstrMap, MaxStageCount, MaxStageCount, false);
2480 
2481  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2482 
2484  // Generate the epilog instructions to complete the pipeline.
2485  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2486  PrologBBs);
2487 
2488  // We need this step because the register allocation doesn't handle some
2489  // situations well, so we insert copies to help out.
2490  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2491 
2492  // Remove dead instructions due to loop induction variables.
2493  removeDeadInstructions(KernelBB, EpilogBBs);
2494 
2495  // Add branches between prolog and epilog blocks.
2496  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2497 
2498  // Remove the original loop since it's no longer referenced.
2499  for (auto &I : *BB)
2501  BB->clear();
2502  BB->eraseFromParent();
2503 
2504  delete[] VRMap;
2505 }
2506 
2507 /// Generate the pipeline prolog code.
2508 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2509  MachineBasicBlock *KernelBB,
2510  ValueMapTy *VRMap,
2511  MBBVectorTy &PrologBBs) {
2512  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2513  assert(PreheaderBB != nullptr &&
2514  "Need to add code to handle loops w/o preheader");
2515  MachineBasicBlock *PredBB = PreheaderBB;
2516  InstrMapTy InstrMap;
2517 
2518  // Generate a basic block for each stage, not including the last stage,
2519  // which will be generated in the kernel. Each basic block may contain
2520  // instructions from multiple stages/iterations.
2521  for (unsigned i = 0; i < LastStage; ++i) {
2522  // Create and insert the prolog basic block prior to the original loop
2523  // basic block. The original loop is removed later.
2524  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2525  PrologBBs.push_back(NewBB);
2526  MF.insert(BB->getIterator(), NewBB);
2527  NewBB->transferSuccessors(PredBB);
2528  PredBB->addSuccessor(NewBB);
2529  PredBB = NewBB;
2530 
2531  // Generate instructions for each appropriate stage. Process instructions
2532  // in original program order.
2533  for (int StageNum = i; StageNum >= 0; --StageNum) {
2534  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2535  BBE = BB->getFirstTerminator();
2536  BBI != BBE; ++BBI) {
2537  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2538  if (BBI->isPHI())
2539  continue;
2540  MachineInstr *NewMI =
2541  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2542  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2543  VRMap);
2544  NewBB->push_back(NewMI);
2545  InstrMap[NewMI] = &*BBI;
2546  }
2547  }
2548  }
2549  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2550  LLVM_DEBUG({
2551  dbgs() << "prolog:\n";
2552  NewBB->dump();
2553  });
2554  }
2555 
2556  PredBB->replaceSuccessor(BB, KernelBB);
2557 
2558  // Check if we need to remove the branch from the preheader to the original
2559  // loop, and replace it with a branch to the new loop.
2560  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2561  if (numBranches) {
2563  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2564  }
2565 }
2566 
2567 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2568 /// that were started in either the prolog or the kernel. We create a basic
2569 /// block for each stage that needs to complete.
2570 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2571  MachineBasicBlock *KernelBB,
2572  ValueMapTy *VRMap,
2573  MBBVectorTy &EpilogBBs,
2574  MBBVectorTy &PrologBBs) {
2575  // We need to change the branch from the kernel to the first epilog block, so
2576  // this call to analyze branch uses the kernel rather than the original BB.
2577  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2579  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2580  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2581  if (checkBranch)
2582  return;
2583 
2584  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2585  if (*LoopExitI == KernelBB)
2586  ++LoopExitI;
2587  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2588  MachineBasicBlock *LoopExitBB = *LoopExitI;
2589 
2590  MachineBasicBlock *PredBB = KernelBB;
2591  MachineBasicBlock *EpilogStart = LoopExitBB;
2592  InstrMapTy InstrMap;
2593 
2594  // Generate a basic block for each stage, not including the last stage,
2595  // which was generated for the kernel. Each basic block may contain
2596  // instructions from multiple stages/iterations.
2597  int EpilogStage = LastStage + 1;
2598  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2600  EpilogBBs.push_back(NewBB);
2601  MF.insert(BB->getIterator(), NewBB);
2602 
2603  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2604  NewBB->addSuccessor(LoopExitBB);
2605 
2606  if (EpilogStart == LoopExitBB)
2607  EpilogStart = NewBB;
2608 
2609  // Add instructions to the epilog depending on the current block.
2610  // Process instructions in original program order.
2611  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2612  for (auto &BBI : *BB) {
2613  if (BBI.isPHI())
2614  continue;
2615  MachineInstr *In = &BBI;
2616  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2617  // Instructions with memoperands in the epilog are updated with
2618  // conservative values.
2619  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2620  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2621  NewBB->push_back(NewMI);
2622  InstrMap[NewMI] = In;
2623  }
2624  }
2625  }
2626  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2627  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2628  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2629  InstrMap, LastStage, EpilogStage, i == 1);
2630  PredBB = NewBB;
2631 
2632  LLVM_DEBUG({
2633  dbgs() << "epilog:\n";
2634  NewBB->dump();
2635  });
2636  }
2637 
2638  // Fix any Phi nodes in the loop exit block.
2639  for (MachineInstr &MI : *LoopExitBB) {
2640  if (!MI.isPHI())
2641  break;
2642  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2643  MachineOperand &MO = MI.getOperand(i);
2644  if (MO.getMBB() == BB)
2645  MO.setMBB(PredBB);
2646  }
2647  }
2648 
2649  // Create a branch to the new epilog from the kernel.
2650  // Remove the original branch and add a new branch to the epilog.
2651  TII->removeBranch(*KernelBB);
2652  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2653  // Add a branch to the loop exit.
2654  if (EpilogBBs.size() > 0) {
2655  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2657  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2658  }
2659 }
2660 
2661 /// Replace all uses of FromReg that appear outside the specified
2662 /// basic block with ToReg.
2663 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2664  MachineBasicBlock *MBB,
2665  MachineRegisterInfo &MRI,
2666  LiveIntervals &LIS) {
2667  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2668  E = MRI.use_end();
2669  I != E;) {
2670  MachineOperand &O = *I;
2671  ++I;
2672  if (O.getParent()->getParent() != MBB)
2673  O.setReg(ToReg);
2674  }
2675  if (!LIS.hasInterval(ToReg))
2676  LIS.createEmptyInterval(ToReg);
2677 }
2678 
2679 /// Return true if the register has a use that occurs outside the
2680 /// specified loop.
2681 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2682  MachineRegisterInfo &MRI) {
2684  E = MRI.use_end();
2685  I != E; ++I)
2686  if (I->getParent()->getParent() != BB)
2687  return true;
2688  return false;
2689 }
2690 
2691 /// Generate Phis for the specific block in the generated pipelined code.
2692 /// This function looks at the Phis from the original code to guide the
2693 /// creation of new Phis.
2694 void SwingSchedulerDAG::generateExistingPhis(
2696  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2697  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2698  bool IsLast) {
2699  // Compute the stage number for the initial value of the Phi, which
2700  // comes from the prolog. The prolog to use depends on to which kernel/
2701  // epilog that we're adding the Phi.
2702  unsigned PrologStage = 0;
2703  unsigned PrevStage = 0;
2704  bool InKernel = (LastStageNum == CurStageNum);
2705  if (InKernel) {
2706  PrologStage = LastStageNum - 1;
2707  PrevStage = CurStageNum;
2708  } else {
2709  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2710  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2711  }
2712 
2713  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2714  BBE = BB->getFirstNonPHI();
2715  BBI != BBE; ++BBI) {
2716  unsigned Def = BBI->getOperand(0).getReg();
2717 
2718  unsigned InitVal = 0;
2719  unsigned LoopVal = 0;
2720  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2721 
2722  unsigned PhiOp1 = 0;
2723  // The Phi value from the loop body typically is defined in the loop, but
2724  // not always. So, we need to check if the value is defined in the loop.
2725  unsigned PhiOp2 = LoopVal;
2726  if (VRMap[LastStageNum].count(LoopVal))
2727  PhiOp2 = VRMap[LastStageNum][LoopVal];
2728 
2729  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2730  int LoopValStage =
2731  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2732  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2733  if (NumStages == 0) {
2734  // We don't need to generate a Phi anymore, but we need to rename any uses
2735  // of the Phi value.
2736  unsigned NewReg = VRMap[PrevStage][LoopVal];
2737  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2738  Def, InitVal, NewReg);
2739  if (VRMap[CurStageNum].count(LoopVal))
2740  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2741  }
2742  // Adjust the number of Phis needed depending on the number of prologs left,
2743  // and the distance from where the Phi is first scheduled. The number of
2744  // Phis cannot exceed the number of prolog stages. Each stage can
2745  // potentially define two values.
2746  unsigned MaxPhis = PrologStage + 2;
2747  if (!InKernel && (int)PrologStage <= LoopValStage)
2748  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2749  unsigned NumPhis = std::min(NumStages, MaxPhis);
2750 
2751  unsigned NewReg = 0;
2752  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2753  // In the epilog, we may need to look back one stage to get the correct
2754  // Phi name because the epilog and prolog blocks execute the same stage.
2755  // The correct name is from the previous block only when the Phi has
2756  // been completely scheduled prior to the epilog, and Phi value is not
2757  // needed in multiple stages.
2758  int StageDiff = 0;
2759  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2760  NumPhis == 1)
2761  StageDiff = 1;
2762  // Adjust the computations below when the phi and the loop definition
2763  // are scheduled in different stages.
2764  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2765  StageDiff = StageScheduled - LoopValStage;
2766  for (unsigned np = 0; np < NumPhis; ++np) {
2767  // If the Phi hasn't been scheduled, then use the initial Phi operand
2768  // value. Otherwise, use the scheduled version of the instruction. This
2769  // is a little complicated when a Phi references another Phi.
2770  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2771  PhiOp1 = InitVal;
2772  // Check if the Phi has already been scheduled in a prolog stage.
2773  else if (PrologStage >= AccessStage + StageDiff + np &&
2774  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2775  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2776  // Check if the Phi has already been scheduled, but the loop intruction
2777  // is either another Phi, or doesn't occur in the loop.
2778  else if (PrologStage >= AccessStage + StageDiff + np) {
2779  // If the Phi references another Phi, we need to examine the other
2780  // Phi to get the correct value.
2781  PhiOp1 = LoopVal;
2782  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2783  int Indirects = 1;
2784  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2785  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2786  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2787  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2788  else
2789  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2790  InstOp1 = MRI.getVRegDef(PhiOp1);
2791  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2792  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2793  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2794  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2795  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2796  break;
2797  }
2798  ++Indirects;
2799  }
2800  } else
2801  PhiOp1 = InitVal;
2802  // If this references a generated Phi in the kernel, get the Phi operand
2803  // from the incoming block.
2804  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2805  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2806  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2807 
2808  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2809  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2810  // In the epilog, a map lookup is needed to get the value from the kernel,
2811  // or previous epilog block. How is does this depends on if the
2812  // instruction is scheduled in the previous block.
2813  if (!InKernel) {
2814  int StageDiffAdj = 0;
2815  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2816  StageDiffAdj = StageScheduled - LoopValStage;
2817  // Use the loop value defined in the kernel, unless the kernel
2818  // contains the last definition of the Phi.
2819  if (np == 0 && PrevStage == LastStageNum &&
2820  (StageScheduled != 0 || LoopValStage != 0) &&
2821  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2822  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2823  // Use the value defined by the Phi. We add one because we switch
2824  // from looking at the loop value to the Phi definition.
2825  else if (np > 0 && PrevStage == LastStageNum &&
2826  VRMap[PrevStage - np + 1].count(Def))
2827  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2828  // Use the loop value defined in the kernel.
2829  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2830  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2831  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2832  // Use the value defined by the Phi, unless we're generating the first
2833  // epilog and the Phi refers to a Phi in a different stage.
2834  else if (VRMap[PrevStage - np].count(Def) &&
2835  (!LoopDefIsPhi || PrevStage != LastStageNum))
2836  PhiOp2 = VRMap[PrevStage - np][Def];
2837  }
2838 
2839  // Check if we can reuse an existing Phi. This occurs when a Phi
2840  // references another Phi, and the other Phi is scheduled in an
2841  // earlier stage. We can try to reuse an existing Phi up until the last
2842  // stage of the current Phi.
2843  if (LoopDefIsPhi) {
2844  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2845  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2846  int StageDiff = (StageScheduled - LoopValStage);
2847  LVNumStages -= StageDiff;
2848  // Make sure the loop value Phi has been processed already.
2849  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2850  NewReg = PhiOp2;
2851  unsigned ReuseStage = CurStageNum;
2852  if (Schedule.isLoopCarried(this, *PhiInst))
2853  ReuseStage -= LVNumStages;
2854  // Check if the Phi to reuse has been generated yet. If not, then
2855  // there is nothing to reuse.
2856  if (VRMap[ReuseStage - np].count(LoopVal)) {
2857  NewReg = VRMap[ReuseStage - np][LoopVal];
2858 
2859  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2860  &*BBI, Def, NewReg);
2861  // Update the map with the new Phi name.
2862  VRMap[CurStageNum - np][Def] = NewReg;
2863  PhiOp2 = NewReg;
2864  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2865  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2866 
2867  if (IsLast && np == NumPhis - 1)
2868  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2869  continue;
2870  }
2871  }
2872  }
2873  if (InKernel && StageDiff > 0 &&
2874  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2875  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2876  }
2877 
2878  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2879  NewReg = MRI.createVirtualRegister(RC);
2880 
2881  MachineInstrBuilder NewPhi =
2882  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2883  TII->get(TargetOpcode::PHI), NewReg);
2884  NewPhi.addReg(PhiOp1).addMBB(BB1);
2885  NewPhi.addReg(PhiOp2).addMBB(BB2);
2886  if (np == 0)
2887  InstrMap[NewPhi] = &*BBI;
2888 
2889  // We define the Phis after creating the new pipelined code, so
2890  // we need to rename the Phi values in scheduled instructions.
2891 
2892  unsigned PrevReg = 0;
2893  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2894  PrevReg = VRMap[PrevStage - np][LoopVal];
2895  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2896  Def, NewReg, PrevReg);
2897  // If the Phi has been scheduled, use the new name for rewriting.
2898  if (VRMap[CurStageNum - np].count(Def)) {
2899  unsigned R = VRMap[CurStageNum - np][Def];
2900  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2901  R, NewReg);
2902  }
2903 
2904  // Check if we need to rename any uses that occurs after the loop. The
2905  // register to replace depends on whether the Phi is scheduled in the
2906  // epilog.
2907  if (IsLast && np == NumPhis - 1)
2908  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2909 
2910  // In the kernel, a dependent Phi uses the value from this Phi.
2911  if (InKernel)
2912  PhiOp2 = NewReg;
2913 
2914  // Update the map with the new Phi name.
2915  VRMap[CurStageNum - np][Def] = NewReg;
2916  }
2917 
2918  while (NumPhis++ < NumStages) {
2919  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2920  &*BBI, Def, NewReg, 0);
2921  }
2922 
2923  // Check if we need to rename a Phi that has been eliminated due to
2924  // scheduling.
2925  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2926  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2927  }
2928 }
2929 
2930 /// Generate Phis for the specified block in the generated pipelined code.
2931 /// These are new Phis needed because the definition is scheduled after the
2932 /// use in the pipelined sequence.
2933 void SwingSchedulerDAG::generatePhis(
2935  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2936  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2937  bool IsLast) {
2938  // Compute the stage number that contains the initial Phi value, and
2939  // the Phi from the previous stage.
2940  unsigned PrologStage = 0;
2941  unsigned PrevStage = 0;
2942  unsigned StageDiff = CurStageNum - LastStageNum;
2943  bool InKernel = (StageDiff == 0);
2944  if (InKernel) {
2945  PrologStage = LastStageNum - 1;
2946  PrevStage = CurStageNum;
2947  } else {
2948  PrologStage = LastStageNum - StageDiff;
2949  PrevStage = LastStageNum + StageDiff - 1;
2950  }
2951 
2952  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2953  BBE = BB->instr_end();
2954  BBI != BBE; ++BBI) {
2955  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2956  MachineOperand &MO = BBI->getOperand(i);
2957  if (!MO.isReg() || !MO.isDef() ||
2959  continue;
2960 
2961  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2962  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2963  unsigned Def = MO.getReg();
2964  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2965  // An instruction scheduled in stage 0 and is used after the loop
2966  // requires a phi in the epilog for the last definition from either
2967  // the kernel or prolog.
2968  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2969  hasUseAfterLoop(Def, BB, MRI))
2970  NumPhis = 1;
2971  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2972  continue;
2973 
2974  unsigned PhiOp2 = VRMap[PrevStage][Def];
2975  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2976  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2977  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2978  // The number of Phis can't exceed the number of prolog stages. The
2979  // prolog stage number is zero based.
2980  if (NumPhis > PrologStage + 1 - StageScheduled)
2981  NumPhis = PrologStage + 1 - StageScheduled;
2982  for (unsigned np = 0; np < NumPhis; ++np) {
2983  unsigned PhiOp1 = VRMap[PrologStage][Def];
2984  if (np <= PrologStage)
2985  PhiOp1 = VRMap[PrologStage - np][Def];
2986  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2987  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2988  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2989  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2990  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2991  }
2992  if (!InKernel)
2993  PhiOp2 = VRMap[PrevStage - np][Def];
2994 
2995  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2996  unsigned NewReg = MRI.createVirtualRegister(RC);
2997 
2998  MachineInstrBuilder NewPhi =
2999  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
3000  TII->get(TargetOpcode::PHI), NewReg);
3001  NewPhi.addReg(PhiOp1).addMBB(BB1);
3002  NewPhi.addReg(PhiOp2).addMBB(BB2);
3003  if (np == 0)
3004  InstrMap[NewPhi] = &*BBI;
3005 
3006  // Rewrite uses and update the map. The actions depend upon whether
3007  // we generating code for the kernel or epilog blocks.
3008  if (InKernel) {
3009  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3010  &*BBI, PhiOp1, NewReg);
3011  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3012  &*BBI, PhiOp2, NewReg);
3013 
3014  PhiOp2 = NewReg;
3015  VRMap[PrevStage - np - 1][Def] = NewReg;
3016  } else {
3017  VRMap[CurStageNum - np][Def] = NewReg;
3018  if (np == NumPhis - 1)
3019  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3020  &*BBI, Def, NewReg);
3021  }
3022  if (IsLast && np == NumPhis - 1)
3023  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
3024  }
3025  }
3026  }
3027 }
3028 
3029 /// Remove instructions that generate values with no uses.
3030 /// Typically, these are induction variable operations that generate values
3031 /// used in the loop itself. A dead instruction has a definition with
3032 /// no uses, or uses that occur in the original loop only.
3033 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
3034  MBBVectorTy &EpilogBBs) {
3035  // For each epilog block, check that the value defined by each instruction
3036  // is used. If not, delete it.
3037  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
3038  MBE = EpilogBBs.rend();
3039  MBB != MBE; ++MBB)
3040  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
3041  ME = (*MBB)->instr_rend();
3042  MI != ME;) {
3043  // From DeadMachineInstructionElem. Don't delete inline assembly.
3044  if (MI->isInlineAsm()) {
3045  ++MI;
3046  continue;
3047  }
3048  bool SawStore = false;
3049  // Check if it's safe to remove the instruction due to side effects.
3050  // We can, and want to, remove Phis here.
3051  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
3052  ++MI;
3053  continue;
3054  }
3055  bool used = true;
3056  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
3057  MOE = MI->operands_end();
3058  MOI != MOE; ++MOI) {
3059  if (!MOI->isReg() || !MOI->isDef())
3060  continue;
3061  unsigned reg = MOI->getReg();
3062  // Assume physical registers are used, unless they are marked dead.
3064  used = !MOI->isDead();
3065  if (used)
3066  break;
3067  continue;
3068  }
3069  unsigned realUses = 0;
3070  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
3071  EI = MRI.use_end();
3072  UI != EI; ++UI) {
3073  // Check if there are any uses that occur only in the original
3074  // loop. If so, that's not a real use.
3075  if (UI->getParent()->getParent() != BB) {
3076  realUses++;
3077  used = true;
3078  break;
3079  }
3080  }
3081  if (realUses > 0)
3082  break;
3083  used = false;
3084  }
3085  if (!used) {
3087  MI++->eraseFromParent();
3088  continue;
3089  }
3090  ++MI;
3091  }
3092  // In the kernel block, check if we can remove a Phi that generates a value
3093  // used in an instruction removed in the epilog block.
3094  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
3095  BBE = KernelBB->getFirstNonPHI();
3096  BBI != BBE;) {
3097  MachineInstr *MI = &*BBI;
3098  ++BBI;
3099  unsigned reg = MI->getOperand(0).getReg();
3100  if (MRI.use_begin(reg) == MRI.use_end()) {
3101  LIS.RemoveMachineInstrFromMaps(*MI);
3102  MI->eraseFromParent();
3103  }
3104  }
3105 }
3106 
3107 /// For loop carried definitions, we split the lifetime of a virtual register
3108 /// that has uses past the definition in the next iteration. A copy with a new
3109 /// virtual register is inserted before the definition, which helps with
3110 /// generating a better register assignment.
3111 ///
3112 /// v1 = phi(a, v2) v1 = phi(a, v2)
3113 /// v2 = phi(b, v3) v2 = phi(b, v3)
3114 /// v3 = .. v4 = copy v1
3115 /// .. = V1 v3 = ..
3116 /// .. = v4
3117 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
3118  MBBVectorTy &EpilogBBs,
3119  SMSchedule &Schedule) {
3121  for (auto &PHI : KernelBB->phis()) {
3122  unsigned Def = PHI.getOperand(0).getReg();
3123  // Check for any Phi definition that used as an operand of another Phi
3124  // in the same block.
3126  E = MRI.use_instr_end();
3127  I != E; ++I) {
3128  if (I->isPHI() && I->getParent() == KernelBB) {
3129  // Get the loop carried definition.
3130  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
3131  if (!LCDef)
3132  continue;
3133  MachineInstr *MI = MRI.getVRegDef(LCDef);
3134  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
3135  continue;
3136  // Search through the rest of the block looking for uses of the Phi
3137  // definition. If one occurs, then split the lifetime.
3138  unsigned SplitReg = 0;
3139  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
3140  KernelBB->instr_end()))
3141  if (BBJ.readsRegister(Def)) {
3142  // We split the lifetime when we find the first use.
3143  if (SplitReg == 0) {
3144  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
3145  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
3146  TII->get(TargetOpcode::COPY), SplitReg)
3147  .addReg(Def);
3148  }
3149  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
3150  }
3151  if (!SplitReg)
3152  continue;
3153  // Search through each of the epilog blocks for any uses to be renamed.
3154  for (auto &Epilog : EpilogBBs)
3155  for (auto &I : *Epilog)
3156  if (I.readsRegister(Def))
3157  I.substituteRegister(Def, SplitReg, 0, *TRI);
3158  break;
3159  }
3160  }
3161  }
3162 }
3163 
3164 /// Remove the incoming block from the Phis in a basic block.
3165 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
3166  for (MachineInstr &MI : *BB) {
3167  if (!MI.isPHI())
3168  break;
3169  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
3170  if (MI.getOperand(i + 1).getMBB() == Incoming) {
3171  MI.RemoveOperand(i + 1);
3172  MI.RemoveOperand(i);
3173  break;
3174  }
3175  }
3176 }
3177 
3178 /// Create branches from each prolog basic block to the appropriate epilog
3179 /// block. These edges are needed if the loop ends before reaching the
3180 /// kernel.
3181 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
3182  MachineBasicBlock *KernelBB,
3183  MBBVectorTy &EpilogBBs,
3184  SMSchedule &Schedule, ValueMapTy *VRMap) {
3185  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
3186  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
3187  MachineInstr *Cmp = Pass.LI.LoopCompare;
3188  MachineBasicBlock *LastPro = KernelBB;
3189  MachineBasicBlock *LastEpi = KernelBB;
3190 
3191  // Start from the blocks connected to the kernel and work "out"
3192  // to the first prolog and the last epilog blocks.
3194  unsigned MaxIter = PrologBBs.size() - 1;
3195  unsigned LC = UINT_MAX;
3196  unsigned LCMin = UINT_MAX;
3197  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3198  // Add branches to the prolog that go to the corresponding
3199  // epilog, and the fall-thru prolog/kernel block.
3200  MachineBasicBlock *Prolog = PrologBBs[j];
3201  MachineBasicBlock *Epilog = EpilogBBs[i];
3202  // We've executed one iteration, so decrement the loop count and check for
3203  // the loop end.
3205  // Check if the LOOP0 has already been removed. If so, then there is no need
3206  // to reduce the trip count.
3207  if (LC != 0)
3208  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3209  MaxIter);
3210 
3211  // Record the value of the first trip count, which is used to determine if
3212  // branches and blocks can be removed for constant trip counts.
3213  if (LCMin == UINT_MAX)
3214  LCMin = LC;
3215 
3216  unsigned numAdded = 0;
3218  Prolog->addSuccessor(Epilog);
3219  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3220  } else if (j >= LCMin) {
3221  Prolog->addSuccessor(Epilog);
3222  Prolog->removeSuccessor(LastPro);
3223  LastEpi->removeSuccessor(Epilog);
3224  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3225  removePhis(Epilog, LastEpi);
3226  // Remove the blocks that are no longer referenced.
3227  if (LastPro != LastEpi) {
3228  LastEpi->clear();
3229  LastEpi->eraseFromParent();
3230  }
3231  LastPro->clear();
3232  LastPro->eraseFromParent();
3233  } else {
3234  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3235  removePhis(Epilog, Prolog);
3236  }
3237  LastPro = Prolog;
3238  LastEpi = Epilog;
3240  E = Prolog->instr_rend();
3241  I != E && numAdded > 0; ++I, --numAdded)
3242  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3243  }
3244 }
3245 
3246 /// Return true if we can compute the amount the instruction changes
3247 /// during each iteration. Set Delta to the amount of the change.
3248 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3250  MachineOperand *BaseOp;
3251  int64_t Offset;
3252  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
3253  return false;
3254 
3255  if (!BaseOp->isReg())
3256  return false;
3257 
3258  unsigned BaseReg = BaseOp->getReg();
3259 
3260  MachineRegisterInfo &MRI = MF.getRegInfo();
3261  // Check if there is a Phi. If so, get the definition in the loop.
3262  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3263  if (BaseDef && BaseDef->isPHI()) {
3264  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3265  BaseDef = MRI.getVRegDef(BaseReg);
3266  }
3267  if (!BaseDef)
3268  return false;
3269 
3270  int D = 0;
3271  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3272  return false;
3273 
3274  Delta = D;
3275  return true;
3276 }
3277 
3278 /// Update the memory operand with a new offset when the pipeliner
3279 /// generates a new copy of the instruction that refers to a
3280 /// different memory location.
3281 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3282  MachineInstr &OldMI, unsigned Num) {
3283  if (Num == 0)
3284  return;
3285  // If the instruction has memory operands, then adjust the offset
3286  // when the instruction appears in different stages.
3287  if (NewMI.memoperands_empty())
3288  return;
3290  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3291  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3292  (!MMO->getValue())) {
3293  NewMMOs.push_back(MMO);
3294  continue;
3295  }
3296  unsigned Delta;
3297  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
3298  int64_t AdjOffset = Delta * Num;
3299  NewMMOs.push_back(
3300  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
3301  } else {
3302  NewMMOs.push_back(
3304  }
3305  }
3306  NewMI.setMemRefs(MF, NewMMOs);
3307 }
3308 
3309 /// Clone the instruction for the new pipelined loop and update the
3310 /// memory operands, if needed.
3311 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3312  unsigned CurStageNum,
3313  unsigned InstStageNum) {
3314  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3315  // Check for tied operands in inline asm instructions. This should be handled
3316  // elsewhere, but I'm not sure of the best solution.
3317  if (OldMI->isInlineAsm())
3318  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3319  const auto &MO = OldMI->getOperand(i);
3320  if (MO.isReg() && MO.isUse())
3321  break;
3322  unsigned UseIdx;
3323  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3324  NewMI->tieOperands(i, UseIdx);
3325  }
3326  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3327  return NewMI;
3328 }
3329 
3330 /// Clone the instruction for the new pipelined loop. If needed, this
3331 /// function updates the instruction using the values saved in the
3332 /// InstrChanges structure.
3333 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3334  unsigned CurStageNum,
3335  unsigned InstStageNum,
3336  SMSchedule &Schedule) {
3337  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3339  InstrChanges.find(getSUnit(OldMI));
3340  if (It != InstrChanges.end()) {
3341  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3342  unsigned BasePos, OffsetPos;
3343  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3344  return nullptr;
3345  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3346  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3347  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3348  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3349  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3350  }
3351  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3352  return NewMI;
3353 }
3354 
3355 /// Update the machine instruction with new virtual registers. This
3356 /// function may change the defintions and/or uses.
3357 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3358  unsigned CurStageNum,
3359  unsigned InstrStageNum,
3360  SMSchedule &Schedule,
3361  ValueMapTy *VRMap) {
3362  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3363  MachineOperand &MO = NewMI->getOperand(i);
3365  continue;
3366  unsigned reg = MO.getReg();
3367  if (MO.isDef()) {
3368  // Create a new virtual register for the definition.
3369  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3370  unsigned NewReg = MRI.createVirtualRegister(RC);
3371  MO.setReg(NewReg);
3372  VRMap[CurStageNum][reg] = NewReg;
3373  if (LastDef)
3374  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3375  } else if (MO.isUse()) {
3376  MachineInstr *Def = MRI.getVRegDef(reg);
3377  // Compute the stage that contains the last definition for instruction.
3378  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3379  unsigned StageNum = CurStageNum;
3380  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3381  // Compute the difference in stages between the defintion and the use.
3382  unsigned StageDiff = (InstrStageNum - DefStageNum);
3383  // Make an adjustment to get the last definition.
3384  StageNum -= StageDiff;
3385  }
3386  if (VRMap[StageNum].count(reg))
3387  MO.setReg(VRMap[StageNum][reg]);
3388  }
3389  }
3390 }
3391 
3392 /// Return the instruction in the loop that defines the register.
3393 /// If the definition is a Phi, then follow the Phi operand to
3394 /// the instruction in the loop.
3395 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3397  MachineInstr *Def = MRI.getVRegDef(Reg);
3398  while (Def->isPHI()) {
3399  if (!Visited.insert(Def).second)
3400  break;
3401  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3402  if (Def->getOperand(i + 1).getMBB() == BB) {
3403  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3404  break;
3405  }
3406  }
3407  return Def;
3408 }
3409 
3410 /// Return the new name for the value from the previous stage.
3411 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3412  unsigned LoopVal, unsigned LoopStage,
3413  ValueMapTy *VRMap,
3414  MachineBasicBlock *BB) {
3415  unsigned PrevVal = 0;
3416  if (StageNum > PhiStage) {
3417  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3418  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3419  // The name is defined in the previous stage.
3420  PrevVal = VRMap[StageNum - 1][LoopVal];
3421  else if (VRMap[StageNum].count(LoopVal))
3422  // The previous name is defined in the current stage when the instruction
3423  // order is swapped.
3424  PrevVal = VRMap[StageNum][LoopVal];
3425  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3426  // The loop value hasn't yet been scheduled.
3427  PrevVal = LoopVal;
3428  else if (StageNum == PhiStage + 1)
3429  // The loop value is another phi, which has not been scheduled.
3430  PrevVal = getInitPhiReg(*LoopInst, BB);
3431  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3432  // The loop value is another phi, which has been scheduled.
3433  PrevVal =
3434  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3435  LoopStage, VRMap, BB);
3436  }
3437  return PrevVal;
3438 }
3439 
3440 /// Rewrite the Phi values in the specified block to use the mappings
3441 /// from the initial operand. Once the Phi is scheduled, we switch
3442 /// to using the loop value instead of the Phi value, so those names
3443 /// do not need to be rewritten.
3444 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3445  unsigned StageNum,
3446  SMSchedule &Schedule,
3447  ValueMapTy *VRMap,
3448  InstrMapTy &InstrMap) {
3449  for (auto &PHI : BB->phis()) {
3450  unsigned InitVal = 0;
3451  unsigned LoopVal = 0;
3452  getPhiRegs(PHI, BB, InitVal, LoopVal);
3453  unsigned PhiDef = PHI.getOperand(0).getReg();
3454 
3455  unsigned PhiStage =
3456  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3457  unsigned LoopStage =
3458  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3459  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3460  if (NumPhis > StageNum)
3461  NumPhis = StageNum;
3462  for (unsigned np = 0; np <= NumPhis; ++np) {
3463  unsigned NewVal =
3464  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3465  if (!NewVal)
3466  NewVal = InitVal;
3467  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3468  PhiDef, NewVal);
3469  }
3470  }
3471 }
3472 
3473 /// Rewrite a previously scheduled instruction to use the register value
3474 /// from the new instruction. Make sure the instruction occurs in the
3475 /// basic block, and we don't change the uses in the new instruction.
3476 void SwingSchedulerDAG::rewriteScheduledInstr(
3477  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3478  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3479  unsigned NewReg, unsigned PrevReg) {
3480  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3481  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3482  // Rewrite uses that have been scheduled already to use the new
3483  // Phi register.
3484  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3485  EI = MRI.use_end();
3486  UI != EI;) {
3487  MachineOperand &UseOp = *UI;
3488  MachineInstr *UseMI = UseOp.getParent();
3489  ++UI;
3490  if (UseMI->getParent() != BB)
3491  continue;
3492  if (UseMI->isPHI()) {
3493  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3494  continue;
3495  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3496  continue;
3497  }
3498  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3499  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3500  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3501  int StageSched = Schedule.stageScheduled(OrigMISU);
3502  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3503  unsigned ReplaceReg = 0;
3504  // This is the stage for the scheduled instruction.
3505  if (StagePhi == StageSched && Phi->isPHI()) {
3506  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3507  if (PrevReg && InProlog)
3508  ReplaceReg = PrevReg;
3509  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3510  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3511  ReplaceReg = PrevReg;
3512  else
3513  ReplaceReg = NewReg;
3514  }
3515  // The scheduled instruction occurs before the scheduled Phi, and the
3516  // Phi is not loop carried.
3517  if (!InProlog && StagePhi + 1 == StageSched &&
3518  !Schedule.isLoopCarried(this, *Phi))
3519  ReplaceReg = NewReg;
3520  if (StagePhi > StageSched && Phi->isPHI())
3521  ReplaceReg = NewReg;
3522  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3523  ReplaceReg = NewReg;
3524  if (ReplaceReg) {
3525  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3526  UseOp.setReg(ReplaceReg);
3527  }
3528  }
3529 }
3530 
3531 /// Check if we can change the instruction to use an offset value from the
3532 /// previous iteration. If so, return true and set the base and offset values
3533 /// so that we can rewrite the load, if necessary.
3534 /// v1 = Phi(v0, v3)
3535 /// v2 = load v1, 0
3536 /// v3 = post_store v1, 4, x
3537 /// This function enables the load to be rewritten as v2 = load v3, 4.
3538 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3539  unsigned &BasePos,
3540  unsigned &OffsetPos,
3541  unsigned &NewBase,
3542  int64_t &Offset) {
3543  // Get the load instruction.
3544  if (TII->isPostIncrement(*MI))
3545  return false;
3546  unsigned BasePosLd, OffsetPosLd;
3547  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3548  return false;
3549  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3550 
3551  // Look for the Phi instruction.
3552  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3553  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3554  if (!Phi || !Phi->isPHI())
3555  return false;
3556  // Get the register defined in the loop block.
3557  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3558  if (!PrevReg)
3559  return false;
3560 
3561  // Check for the post-increment load/store instruction.
3562  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3563  if (!PrevDef || PrevDef == MI)
3564  return false;
3565 
3566  if (!TII->isPostIncrement(*PrevDef))
3567  return false;
3568 
3569  unsigned BasePos1 = 0, OffsetPos1 = 0;
3570  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3571  return false;
3572 
3573  // Make sure that the instructions do not access the same memory location in
3574  // the next iteration.
3575  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3576  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3577  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3578  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3579  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3580  MF.DeleteMachineInstr(NewMI);
3581  if (!Disjoint)
3582  return false;
3583 
3584  // Set the return value once we determine that we return true.
3585  BasePos = BasePosLd;
3586  OffsetPos = OffsetPosLd;
3587  NewBase = PrevReg;
3588  Offset = StoreOffset;
3589  return true;
3590 }
3591 
3592 /// Apply changes to the instruction if needed. The changes are need
3593 /// to improve the scheduling and depend up on the final schedule.
3594 void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3595  SMSchedule &Schedule) {
3596  SUnit *SU = getSUnit(MI);
3598  InstrChanges.find(SU);
3599  if (It != InstrChanges.end()) {
3600  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3601  unsigned BasePos, OffsetPos;
3602  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3603  return;
3604  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3605  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3606  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3607  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3608  int BaseStageNum = Schedule.stageScheduled(SU);
3609  int BaseCycleNum = Schedule.cycleScheduled(SU);
3610  if (BaseStageNum < DefStageNum) {
3611  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3612  int OffsetDiff = DefStageNum - BaseStageNum;
3613  if (DefCycleNum < BaseCycleNum) {
3614  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3615  if (OffsetDiff > 0)
3616  --OffsetDiff;
3617  }
3618  int64_t NewOffset =
3619  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3620  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3621  SU->setInstr(NewMI);
3622  MISUnitMap[NewMI] = SU;
3623  NewMIs.insert(NewMI);
3624  }
3625  }
3626 }
3627 
3628 /// Return true for an order or output dependence that is loop carried
3629 /// potentially. A dependence is loop carried if the destination defines a valu
3630 /// that may be used or defined by the source in a subsequent iteration.
3631 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
3632  bool isSucc) {
3633  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3634  Dep.isArtificial())
3635  return false;
3636 
3637  if (!SwpPruneLoopCarried)
3638  return true;
3639 
3640  if (Dep.getKind() == SDep::Output)
3641  return true;
3642 
3643  MachineInstr *SI = Source->getInstr();
3644  MachineInstr *DI = Dep.getSUnit()->getInstr();
3645  if (!isSucc)
3646  std::swap(SI, DI);
3647  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3648 
3649  // Assume ordered loads and stores may have a loop carried dependence.
3650  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3652  return true;
3653 
3654  // Only chain dependences between a load and store can be loop carried.
3655  if (!DI->mayStore() || !SI->mayLoad())
3656  return false;
3657 
3658  unsigned DeltaS, DeltaD;
3659  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3660  return true;
3661 
3662  MachineOperand *BaseOpS, *BaseOpD;
3663  int64_t OffsetS, OffsetD;
3665  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3666  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3667  return true;
3668 
3669  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3670  return true;
3671 
3672  // Check that the base register is incremented by a constant value for each
3673  // iteration.
3674  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3675  if (!Def || !Def->isPHI())
3676  return true;
3677  unsigned InitVal = 0;
3678  unsigned LoopVal = 0;
3679  getPhiRegs(*Def, BB, InitVal, LoopVal);
3680  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3681  int D = 0;
3682  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3683  return true;
3684 
3685  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3686  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3687 
3688  // This is the main test, which checks the offset values and the loop
3689  // increment value to determine if the accesses may be loop carried.
3690  if (OffsetS >= OffsetD)
3691  return OffsetS + AccessSizeS > DeltaS;
3692  else
3693  return OffsetD + AccessSizeD > DeltaD;
3694 
3695  return true;
3696 }
3697 
3698 void SwingSchedulerDAG::postprocessDAG() {
3699  for (auto &M : Mutations)
3700  M->apply(this);
3701 }
3702 
3703 /// Try to schedule the node at the specified StartCycle and continue
3704 /// until the node is schedule or the EndCycle is reached. This function
3705 /// returns true if the node is scheduled. This routine may search either
3706 /// forward or backward for a place to insert the instruction based upon
3707 /// the relative values of StartCycle and EndCycle.
3708 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3709  bool forward = true;
3710  if (StartCycle > EndCycle)
3711  forward = false;
3712 
3713  // The terminating condition depends on the direction.
3714  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3715  for (int curCycle = StartCycle; curCycle != termCycle;
3716  forward ? ++curCycle : --curCycle) {
3717 
3718  // Add the already scheduled instructions at the specified cycle to the DFA.
3719  Resources->clearResources();
3720  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3721  checkCycle <= LastCycle; checkCycle += II) {
3722  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3723 
3724  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3725  E = cycleInstrs.end();
3726  I != E; ++I) {
3727  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3728  continue;
3729  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3730  "These instructions have already been scheduled.");
3731  Resources->reserveResources(*(*I)->getInstr());
3732  }
3733  }
3734  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3735  Resources->canReserveResources(*SU->getInstr())) {
3736  LLVM_DEBUG({
3737  dbgs() << "\tinsert at cycle " << curCycle << " ";
3738  SU->getInstr()->dump();
3739  });
3740 
3741  ScheduledInstrs[curCycle].push_back(SU);
3742  InstrToCycle.insert(std::make_pair(SU, curCycle));
3743  if (curCycle > LastCycle)
3744  LastCycle = curCycle;
3745  if (curCycle < FirstCycle)
3746  FirstCycle = curCycle;
3747  return true;
3748  }
3749  LLVM_DEBUG({
3750  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3751  SU->getInstr()->dump();
3752  });
3753  }
3754  return false;
3755 }
3756 
3757 // Return the cycle of the earliest scheduled instruction in the chain.
3758 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3759  SmallPtrSet<SUnit *, 8> Visited;
3760  SmallVector<SDep, 8> Worklist;
3761  Worklist.push_back(Dep);
3762  int EarlyCycle = INT_MAX;
3763  while (!Worklist.empty()) {
3764  const SDep &Cur = Worklist.pop_back_val();
3765  SUnit *PrevSU = Cur.getSUnit();
3766  if (Visited.count(PrevSU))
3767  continue;
3768  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3769  if (it == InstrToCycle.end())
3770  continue;
3771  EarlyCycle = std::min(EarlyCycle, it->second);
3772  for (const auto &PI : PrevSU->Preds)
3773  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3774  Worklist.push_back(PI);
3775  Visited.insert(PrevSU);
3776  }
3777  return EarlyCycle;
3778 }
3779 
3780 // Return the cycle of the latest scheduled instruction in the chain.
3781 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3782  SmallPtrSet<SUnit *, 8> Visited;
3783  SmallVector<SDep, 8> Worklist;
3784  Worklist.push_back(Dep);
3785  int LateCycle = INT_MIN;
3786  while (!Worklist.empty()) {
3787  const SDep &Cur = Worklist.pop_back_val();
3788  SUnit *SuccSU = Cur.getSUnit();
3789  if (Visited.count(SuccSU))
3790  continue;
3791  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3792  if (it == InstrToCycle.end())
3793  continue;
3794  LateCycle = std::max(LateCycle, it->second);
3795  for (const auto &SI : SuccSU->Succs)
3796  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3797  Worklist.push_back(SI);
3798  Visited.insert(SuccSU);
3799  }
3800  return LateCycle;
3801 }
3802 
3803 /// If an instruction has a use that spans multiple iterations, then
3804 /// return true. These instructions are characterized by having a back-ege
3805 /// to a Phi, which contains a reference to another Phi.
3806 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3807  for (auto &P : SU->Preds)
3808  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3809  for (auto &S : P.getSUnit()->Succs)
3810  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3811  return P.getSUnit();
3812  return nullptr;
3813 }
3814 
3815 /// Compute the scheduling start slot for the instruction. The start slot
3816 /// depends on any predecessor or successor nodes scheduled already.
3817 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3818  int *MinEnd, int *MaxStart, int II,
3819  SwingSchedulerDAG *DAG) {
3820  // Iterate over each instruction that has been scheduled already. The start
3821  // slot computation depends on whether the previously scheduled instruction
3822  // is a predecessor or successor of the specified instruction.
3823  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3824 
3825  // Iterate over each instruction in the current cycle.
3826  for (SUnit *I : getInstructions(cycle)) {
3827  // Because we're processing a DAG for the dependences, we recognize
3828  // the back-edge in recurrences by anti dependences.
3829  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3830  const SDep &Dep = SU->Preds[i];
3831  if (Dep.getSUnit() == I) {
3832  if (!DAG->isBackedge(SU, Dep)) {
3833  int EarlyStart = cycle + Dep.getLatency() -
3834  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3835  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3836  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3837  int End = earliestCycleInChain(Dep) + (II - 1);
3838  *MinEnd = std::min(*MinEnd, End);
3839  }
3840  } else {
3841  int LateStart = cycle - Dep.getLatency() +
3842  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3843  *MinLateStart = std::min(*MinLateStart, LateStart);
3844  }
3845  }
3846  // For instruction that requires multiple iterations, make sure that
3847  // the dependent instruction is not scheduled past the definition.
3848  SUnit *BE = multipleIterations(I, DAG);
3849  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3850  !SU->isPred(I))
3851  *MinLateStart = std::min(*MinLateStart, cycle);
3852  }
3853  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3854  if (SU->Succs[i].getSUnit() == I) {
3855  const SDep &Dep = SU->Succs[i];
3856  if (!DAG->isBackedge(SU, Dep)) {
3857  int LateStart = cycle - Dep.getLatency() +
3858  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3859  *MinLateStart = std::min(*MinLateStart, LateStart);
3860  if (DAG->isLoopCarriedDep(SU, Dep)) {
3861  int Start = latestCycleInChain(Dep) + 1 - II;
3862  *MaxStart = std::max(*MaxStart, Start);
3863  }
3864  } else {
3865  int EarlyStart = cycle + Dep.getLatency() -
3866  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3867  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3868  }
3869  }
3870  }
3871  }
3872  }
3873 }
3874 
3875 /// Order the instructions within a cycle so that the definitions occur
3876 /// before the uses. Returns true if the instruction is added to the start
3877 /// of the list, or false if added to the end.
3878 void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3879  std::deque<SUnit *> &Insts) {
3880  MachineInstr *MI = SU->getInstr();
3881  bool OrderBeforeUse = false;
3882  bool OrderAfterDef = false;
3883  bool OrderBeforeDef = false;
3884  unsigned MoveDef = 0;
3885  unsigned MoveUse = 0;
3886  int StageInst1 = stageScheduled(SU);
3887 
3888  unsigned Pos = 0;
3889  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3890  ++I, ++Pos) {
3891  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3892  MachineOperand &MO = MI->getOperand(i);
3894  continue;
3895 
3896  unsigned Reg = MO.getReg();
3897  unsigned BasePos, OffsetPos;
3898  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3899  if (MI->getOperand(BasePos).getReg() == Reg)
3900  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3901  Reg = NewReg;
3902  bool Reads, Writes;
3903  std::tie(Reads, Writes) =
3904  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3905  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3906  OrderBeforeUse = true;
3907  if (MoveUse == 0)
3908  MoveUse = Pos;
3909  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3910  // Add the instruction after the scheduled instruction.
3911  OrderAfterDef = true;
3912  MoveDef = Pos;
3913  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3914  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3915  OrderBeforeUse = true;
3916  if (MoveUse == 0)
3917  MoveUse = Pos;
3918  } else {
3919  OrderAfterDef = true;
3920  MoveDef = Pos;
3921  }
3922  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3923  OrderBeforeUse = true;
3924  if (MoveUse == 0)
3925  MoveUse = Pos;
3926  if (MoveUse != 0) {
3927  OrderAfterDef = true;
3928  MoveDef = Pos - 1;
3929  }
3930  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3931  // Add the instruction before the scheduled instruction.
3932  OrderBeforeUse = true;
3933  if (MoveUse == 0)
3934  MoveUse = Pos;
3935  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3936  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3937  if (MoveUse == 0) {
3938  OrderBeforeDef = true;
3939  MoveUse = Pos;
3940  }
3941  }
3942  }
3943  // Check for order dependences between instructions. Make sure the source
3944  // is ordered before the destination.
3945  for (auto &S : SU->Succs) {
3946  if (S.getSUnit() != *I)
3947  continue;
3948  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3949  OrderBeforeUse = true;
3950  if (Pos < MoveUse)
3951  MoveUse = Pos;
3952  }
3953  }
3954  for (auto &P : SU->Preds) {
3955  if (P.getSUnit() != *I)
3956  continue;
3957  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3958  OrderAfterDef = true;
3959  MoveDef = Pos;
3960  }
3961  }
3962  }
3963 
3964  // A circular dependence.
3965  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3966  OrderBeforeUse = false;
3967 
3968  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3969  // to a loop-carried dependence.
3970  if (OrderBeforeDef)
3971  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3972 
3973  // The uncommon case when the instruction order needs to be updated because
3974  // there is both a use and def.
3975  if (OrderBeforeUse && OrderAfterDef) {
3976  SUnit *UseSU = Insts.at(MoveUse);
3977  SUnit *DefSU = Insts.at(MoveDef);
3978  if (MoveUse > MoveDef) {
3979  Insts.erase(Insts.begin() + MoveUse);
3980  Insts.erase(Insts.begin() + MoveDef);
3981  } else {
3982  Insts.erase(Insts.begin() + MoveDef);
3983  Insts.erase(Insts.begin() + MoveUse);
3984  }
3985  orderDependence(SSD, UseSU, Insts);
3986  orderDependence(SSD, SU, Insts);
3987  orderDependence(SSD, DefSU, Insts);
3988  return;
3989  }
3990  // Put the new instruction first if there is a use in the list. Otherwise,
3991  // put it at the end of the list.
3992  if (OrderBeforeUse)
3993  Insts.push_front(SU);
3994  else
3995  Insts.push_back(SU);
3996 }
3997 
3998 /// Return true if the scheduled Phi has a loop carried operand.
3999 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
4000  if (!Phi.isPHI())
4001  return false;
4002  assert(Phi.isPHI() && "Expecting a Phi.");
4003  SUnit *DefSU = SSD->getSUnit(&Phi);
4004  unsigned DefCycle = cycleScheduled(DefSU);
4005  int DefStage = stageScheduled(DefSU);
4006 
4007  unsigned InitVal = 0;
4008  unsigned LoopVal = 0;
4009  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
4010  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
4011  if (!UseSU)
4012  return true;
4013  if (UseSU->getInstr()->isPHI())
4014  return true;
4015  unsigned LoopCycle = cycleScheduled(UseSU);
4016  int LoopStage = stageScheduled(UseSU);
4017  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
4018 }
4019 
4020 /// Return true if the instruction is a definition that is loop carried
4021 /// and defines the use on the next iteration.
4022 /// v1 = phi(v2, v3)
4023 /// (Def) v3 = op v1
4024 /// (MO) = v1
4025 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
4026 /// register.
4027 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
4029  if (!MO.isReg())
4030  return false;
4031  if (Def->isPHI())
4032  return false;
4033  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
4034  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
4035  return false;
4036  if (!isLoopCarried(SSD, *Phi))
4037  return false;
4038  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
4039  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
4040  MachineOperand &DMO = Def->getOperand(i);
4041  if (!DMO.isReg() || !DMO.isDef())
4042  continue;
4043  if (DMO.getReg() == LoopReg)
4044  return true;
4045  }
4046  return false;
4047 }
4048 
4049 // Check if the generated schedule is valid. This function checks if
4050 // an instruction that uses a physical register is scheduled in a
4051 // different stage than the definition. The pipeliner does not handle
4052 // physical register values that may cross a basic block boundary.
4053 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
4054  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
4055  SUnit &SU = SSD->SUnits[i];
4056  if (!SU.hasPhysRegDefs)
4057  continue;
4058  int StageDef = stageScheduled(&SU);
4059  assert(StageDef != -1 && "Instruction should have been scheduled.");
4060  for (auto &SI : SU.Succs)
4061  if (SI.isAssignedRegDep())
4062  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
4063  if (stageScheduled(SI.getSUnit()) != StageDef)
4064  return false;
4065  }
4066  return true;
4067 }
4068 
4069 /// A property of the node order in swing-modulo-scheduling is
4070 /// that for nodes outside circuits the following holds:
4071 /// none of them is scheduled after both a successor and a
4072 /// predecessor.
4073 /// The method below checks whether the property is met.
4074 /// If not, debug information is printed and statistics information updated.
4075 /// Note that we do not use an assert statement.
4076 /// The reason is that although an invalid node oder may prevent
4077 /// the pipeliner from finding a pipelined schedule for arbitrary II,
4078 /// it does not lead to the generation of incorrect code.
4079 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
4080 
4081  // a sorted vector that maps each SUnit to its index in the NodeOrder
4082  typedef std::pair<SUnit *, unsigned> UnitIndex;
4083  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
4084 
4085  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
4086  Indices.push_back(std::make_pair(NodeOrder[i], i));
4087 
4088  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
4089  return std::get<0>(i1) < std::get<0>(i2);
4090  };
4091 
4092  // sort, so that we can perform a binary search
4093  llvm::sort(Indices, CompareKey);
4094 
4095  bool Valid = true;
4096  (void)Valid;
4097  // for each SUnit in the NodeOrder, check whether
4098  // it appears after both a successor and a predecessor
4099  // of the SUnit. If this is the case, and the SUnit
4100  // is not part of circuit, then the NodeOrder is not
4101  // valid.
4102  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
4103  SUnit *SU = NodeOrder[i];
4104  unsigned Index = i;
4105 
4106  bool PredBefore = false;
4107  bool SuccBefore = false;
4108 
4109  SUnit *Succ;
4110  SUnit *Pred;
4111  (void)Succ;
4112  (void)Pred;
4113 
4114  for (SDep &PredEdge : SU->Preds) {
4115  SUnit *PredSU = PredEdge.getSUnit();
4116  unsigned PredIndex =
4117  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4118  std::make_pair(PredSU, 0), CompareKey));
4119  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
4120  PredBefore = true;
4121  Pred = PredSU;
4122  break;
4123  }
4124  }
4125 
4126  for (SDep &SuccEdge : SU->Succs) {
4127  SUnit *SuccSU = SuccEdge.getSUnit();
4128  unsigned SuccIndex =
4129  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4130  std::make_pair(SuccSU, 0), CompareKey));
4131  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
4132  SuccBefore = true;
4133  Succ = SuccSU;
4134  break;
4135  }
4136  }
4137 
4138  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
4139  // instructions in circuits are allowed to be scheduled
4140  // after both a successor and predecessor.
4141  bool InCircuit = std::any_of(
4142  Circuits.begin(), Circuits.end(),
4143  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
4144  if (InCircuit)
4145  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
4146  else {
4147  Valid = false;
4148  NumNodeOrderIssues++;
4149  LLVM_DEBUG(dbgs() << "Predecessor ";);
4150  }
4151  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
4152  << " are scheduled before node " << SU->NodeNum
4153  << "\n";);
4154  }
4155  }
4156 
4157  LLVM_DEBUG({
4158  if (!Valid)
4159  dbgs() << "Invalid node order found!\n";
4160  });
4161 }
4162 
4163 /// Attempt to fix the degenerate cases when the instruction serialization
4164 /// causes the register lifetimes to overlap. For example,
4165 /// p' = store_pi(p, b)
4166 /// = load p, offset
4167 /// In this case p and p' overlap, which means that two registers are needed.
4168 /// Instead, this function changes the load to use p' and updates the offset.
4169 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
4170  unsigned OverlapReg = 0;
4171  unsigned NewBaseReg = 0;
4172  for (SUnit *SU : Instrs) {
4173  MachineInstr *MI = SU->getInstr();
4174  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4175  const MachineOperand &MO = MI->getOperand(i);
4176  // Look for an instruction that uses p. The instruction occurs in the
4177  // same cycle but occurs later in the serialized order.
4178  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
4179  // Check that the instruction appears in the InstrChanges structure,
4180  // which contains instructions that can have the offset updated.
4182  InstrChanges.find(SU);
4183  if (It != InstrChanges.end()) {
4184  unsigned BasePos, OffsetPos;
4185  // Update the base register and adjust the offset.
4186  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
4187  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
4188  NewMI->getOperand(BasePos).setReg(NewBaseReg);
4189  int64_t NewOffset =
4190  MI->getOperand(OffsetPos).getImm() - It->second.second;
4191  NewMI->getOperand(OffsetPos).setImm(NewOffset);
4192  SU->setInstr(NewMI);
4193  MISUnitMap[NewMI] = SU;
4194  NewMIs.insert(NewMI);
4195  }
4196  }
4197  OverlapReg = 0;
4198  NewBaseReg = 0;
4199  break;
4200  }
4201  // Look for an instruction of the form p' = op(p), which uses and defines
4202  // two virtual registers that get allocated to the same physical register.
4203  unsigned TiedUseIdx = 0;
4204  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
4205  // OverlapReg is p in the example above.
4206  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
4207  // NewBaseReg is p' in the example above.
4208  NewBaseReg = MI->getOperand(i).getReg();
4209  break;
4210  }
4211  }
4212  }
4213 }
4214 
4215 /// After the schedule has been formed, call this function to combine
4216 /// the instructions from the different stages/cycles. That is, this
4217 /// function creates a schedule that represents a single iteration.
4218 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
4219  // Move all instructions to the first stage from later stages.
4220  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4221  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
4222  ++stage) {
4223  std::deque<SUnit *> &cycleInstrs =
4224  ScheduledInstrs[cycle + (stage * InitiationInterval)];
4225  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
4226  E = cycleInstrs.rend();
4227  I != E; ++I)
4228  ScheduledInstrs[cycle].push_front(*I);
4229  }
4230  }
4231  // Iterate over the definitions in each instruction, and compute the
4232  // stage difference for each use. Keep the maximum value.
4233  for (auto &I : InstrToCycle) {
4234  int DefStage = stageScheduled(I.first);
4235  MachineInstr *MI = I.first->getInstr();
4236  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4237  MachineOperand &Op = MI->getOperand(i);
4238  if (!Op.isReg() || !Op.isDef())
4239  continue;
4240 
4241  unsigned Reg = Op.getReg();
4242  unsigned MaxDiff = 0;
4243  bool PhiIsSwapped = false;
4244  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
4245  EI = MRI.use_end();
4246  UI != EI; ++UI) {
4247  MachineOperand &UseOp = *UI;
4248  MachineInstr *UseMI = UseOp.getParent();
4249  SUnit *SUnitUse = SSD->getSUnit(UseMI);
4250  int UseStage = stageScheduled(SUnitUse);
4251  unsigned Diff = 0;
4252  if (UseStage != -1 && UseStage >= DefStage)
4253  Diff = UseStage - DefStage;
4254  if (MI->isPHI()) {
4255  if (isLoopCarried(SSD, *MI))
4256  ++Diff;
4257  else
4258  PhiIsSwapped = true;
4259  }
4260  MaxDiff = std::max(Diff, MaxDiff);
4261  }
4262  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
4263  }
4264  }
4265 
4266  // Erase all the elements in the later stages. Only one iteration should
4267  // remain in the scheduled list, and it contains all the instructions.
4268  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
4269  ScheduledInstrs.erase(cycle);
4270 
4271  // Change the registers in instruction as specified in the InstrChanges
4272  // map. We need to use the new registers to create the correct order.
4273  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
4274  SUnit *SU = &SSD->SUnits[i];
4275  SSD->applyInstrChange(SU->getInstr(), *this);
4276  }
4277 
4278  // Reorder the instructions in each cycle to fix and improve the
4279  // generated code.
4280  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
4281  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
4282  std::deque<SUnit *> newOrderPhi;
4283  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4284  SUnit *SU = cycleInstrs[i];
4285  if (SU->getInstr()->isPHI())
4286  newOrderPhi.push_back(SU);
4287  }
4288  std::deque<SUnit *> newOrderI;
4289  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4290  SUnit *SU = cycleInstrs[i];
4291  if (!SU->getInstr()->isPHI())
4292  orderDependence(SSD, SU, newOrderI);
4293  }
4294  // Replace the old order with the new order.
4295  cycleInstrs.swap(newOrderPhi);
4296  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
4297  SSD->fixupRegisterOverlaps(cycleInstrs);
4298  }
4299 
4300  LLVM_DEBUG(dump(););
4301 }
4302 
4303 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4304 /// Print the schedule information to the given output.
4305 void SMSchedule::print(raw_ostream &os) const {
4306  // Iterate over each cycle.
4307  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4308  // Iterate over each instruction in the cycle.
4309  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
4310  for (SUnit *CI : cycleInstrs->second) {
4311  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
4312  os << "(" << CI->NodeNum << ") ";
4313  CI->getInstr()->print(os);
4314  os << "\n";
4315  }
4316  }
4317 }
4318 
4319 /// Utility function used for debugging to print the schedule.
4320 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
4321 #endif
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:754
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
BitVector & set()
Definition: BitVector.h:398
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:454
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:89
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:270
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:402
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isInlineAsm() const
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
Mutate the DAG as a postpass after normal DAG building.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
block Block Frequency true
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isPHI() const
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:264
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
virtual bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:263
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:370
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:265
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1182
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:222
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:100
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:423
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
SlotIndexes pass.
Definition: SlotIndexes.h:331
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1276
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:148
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:348
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1246
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
TargetInstrInfo - Interface to description of machine instruction set.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
const Value * getValue() const
Return the base address of the memory access.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:583
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:577
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:549
BitVector & reset()
Definition: BitVector.h:439
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1187
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1220
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:50
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void push_back(bool Val)
Definition: BitVector.h:507
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1110
hexagon gen pred
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:204
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
print lazy value Lazy Value Info Printer Pass
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1161
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:747
void clear()
Completely clear the SetVector.
Definition: SetVector.h:216
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:266
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:212
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:410
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:64
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:490
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:546
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
A vector that has set insertion semantics.
Definition: SetVector.h:41
static use_instr_iterator use_instr_end()
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:200
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:123
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:693
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static cl::opt< bool > SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable CopyToPhi DAG Mutation"))
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:435