LLVM  7.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "ScheduleDAGSDNodes.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/MC/MCRegisterInfo.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPred - adds a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Updates the topological ordering if required.
225  void AddPred(SUnit *SU, const SDep &D) {
226  Topo.AddPred(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// RemovePred - removes a predecessor edge from SUnit SU.
231  /// This returns true if an edge was removed.
232  /// Updates the topological ordering if required.
233  void RemovePred(SUnit *SU, const SDep &D) {
234  Topo.RemovePred(SU, D.getSUnit());
235  SU->removePred(D);
236  }
237 
238 private:
239  bool isReady(SUnit *SU) {
240  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
241  AvailableQueue->isReady(SU);
242  }
243 
244  void ReleasePred(SUnit *SU, const SDep *PredEdge);
245  void ReleasePredecessors(SUnit *SU);
246  void ReleasePending();
247  void AdvanceToCycle(unsigned NextCycle);
248  void AdvancePastStalls(SUnit *SU);
249  void EmitNode(SUnit *SU);
250  void ScheduleNodeBottomUp(SUnit*);
251  void CapturePred(SDep *PredEdge);
252  void UnscheduleNodeBottomUp(SUnit*);
253  void RestoreHazardCheckerBottomUp();
254  void BacktrackBottomUp(SUnit*, SUnit*);
255  SUnit *TryUnfoldSU(SUnit *);
256  SUnit *CopyAndMoveSuccessors(SUnit*);
257  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
258  const TargetRegisterClass*,
259  const TargetRegisterClass*,
261  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
262 
263  void releaseInterferences(unsigned Reg = 0);
264 
265  SUnit *PickNodeToScheduleBottomUp();
266  void ListScheduleBottomUp();
267 
268  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
269  /// Updates the topological ordering if required.
270  SUnit *CreateNewSUnit(SDNode *N) {
271  unsigned NumSUnits = SUnits.size();
272  SUnit *NewNode = newSUnit(N);
273  // Update the topological ordering.
274  if (NewNode->NodeNum >= NumSUnits)
276  return NewNode;
277  }
278 
279  /// CreateClone - Creates a new SUnit from an existing one.
280  /// Updates the topological ordering if required.
281  SUnit *CreateClone(SUnit *N) {
282  unsigned NumSUnits = SUnits.size();
283  SUnit *NewNode = Clone(N);
284  // Update the topological ordering.
285  if (NewNode->NodeNum >= NumSUnits)
287  return NewNode;
288  }
289 
290  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
291  /// need actual latency information but the hybrid scheduler does.
292  bool forceUnitLatencies() const override {
293  return !NeedLatency;
294  }
295 };
296 
297 } // end anonymous namespace
298 
299 /// GetCostForDef - Looks up the register class and cost for a given definition.
300 /// Typically this just means looking up the representative register class,
301 /// but for untyped values (MVT::Untyped) it means inspecting the node's
302 /// opcode to determine what register class is being generated.
303 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
304  const TargetLowering *TLI,
305  const TargetInstrInfo *TII,
306  const TargetRegisterInfo *TRI,
307  unsigned &RegClass, unsigned &Cost,
308  const MachineFunction &MF) {
309  MVT VT = RegDefPos.GetValue();
310 
311  // Special handling for untyped values. These values can only come from
312  // the expansion of custom DAG-to-DAG patterns.
313  if (VT == MVT::Untyped) {
314  const SDNode *Node = RegDefPos.GetNode();
315 
316  // Special handling for CopyFromReg of untyped values.
317  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
318  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
319  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
320  RegClass = RC->getID();
321  Cost = 1;
322  return;
323  }
324 
325  unsigned Opcode = Node->getMachineOpcode();
326  if (Opcode == TargetOpcode::REG_SEQUENCE) {
327  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
328  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
329  RegClass = RC->getID();
330  Cost = 1;
331  return;
332  }
333 
334  unsigned Idx = RegDefPos.GetIdx();
335  const MCInstrDesc Desc = TII->get(Opcode);
336  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
337  RegClass = RC->getID();
338  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
339  // better way to determine it.
340  Cost = 1;
341  } else {
342  RegClass = TLI->getRepRegClassFor(VT)->getID();
343  Cost = TLI->getRepRegClassCostFor(VT);
344  }
345 }
346 
347 /// Schedule - Schedule the DAG using list scheduling.
348 void ScheduleDAGRRList::Schedule() {
349  DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
350  << " '" << BB->getName() << "' **********\n");
351 
352  CurCycle = 0;
353  IssueCount = 0;
354  MinAvailableCycle =
356  NumLiveRegs = 0;
357  // Allocate slots for each physical register, plus one for a special register
358  // to track the virtual resource of a calling sequence.
359  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
360  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
361  CallSeqEndForStart.clear();
362  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
363 
364  // Build the scheduling graph.
365  BuildSchedGraph(nullptr);
366 
367  DEBUG(for (SUnit &SU : SUnits)
368  SU.dumpAll(this));
370 
371  AvailableQueue->initNodes(SUnits);
372 
373  HazardRec->Reset();
374 
375  // Execute the actual scheduling loop.
376  ListScheduleBottomUp();
377 
378  AvailableQueue->releaseState();
379 
380  DEBUG({
381  dbgs() << "*** Final schedule ***\n";
382  dumpSchedule();
383  dbgs() << '\n';
384  });
385 }
386 
387 //===----------------------------------------------------------------------===//
388 // Bottom-Up Scheduling
389 //===----------------------------------------------------------------------===//
390 
391 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
392 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
393 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
394  SUnit *PredSU = PredEdge->getSUnit();
395 
396 #ifndef NDEBUG
397  if (PredSU->NumSuccsLeft == 0) {
398  dbgs() << "*** Scheduling failed! ***\n";
399  PredSU->dump(this);
400  dbgs() << " has been released too many times!\n";
401  llvm_unreachable(nullptr);
402  }
403 #endif
404  --PredSU->NumSuccsLeft;
405 
406  if (!forceUnitLatencies()) {
407  // Updating predecessor's height. This is now the cycle when the
408  // predecessor can be scheduled without causing a pipeline stall.
409  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
410  }
411 
412  // If all the node's successors are scheduled, this node is ready
413  // to be scheduled. Ignore the special EntrySU node.
414  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
415  PredSU->isAvailable = true;
416 
417  unsigned Height = PredSU->getHeight();
418  if (Height < MinAvailableCycle)
419  MinAvailableCycle = Height;
420 
421  if (isReady(PredSU)) {
422  AvailableQueue->push(PredSU);
423  }
424  // CapturePred and others may have left the node in the pending queue, avoid
425  // adding it twice.
426  else if (!PredSU->isPending) {
427  PredSU->isPending = true;
428  PendingQueue.push_back(PredSU);
429  }
430  }
431 }
432 
433 /// IsChainDependent - Test if Outer is reachable from Inner through
434 /// chain dependencies.
435 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
436  unsigned NestLevel,
437  const TargetInstrInfo *TII) {
438  SDNode *N = Outer;
439  while (true) {
440  if (N == Inner)
441  return true;
442  // For a TokenFactor, examine each operand. There may be multiple ways
443  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
444  // most nesting in order to ensure that we find the corresponding match.
445  if (N->getOpcode() == ISD::TokenFactor) {
446  for (const SDValue &Op : N->op_values())
447  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
448  return true;
449  return false;
450  }
451  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
452  if (N->isMachineOpcode()) {
453  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
454  ++NestLevel;
455  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
456  if (NestLevel == 0)
457  return false;
458  --NestLevel;
459  }
460  }
461  // Otherwise, find the chain and continue climbing.
462  for (const SDValue &Op : N->op_values())
463  if (Op.getValueType() == MVT::Other) {
464  N = Op.getNode();
465  goto found_chain_operand;
466  }
467  return false;
468  found_chain_operand:;
469  if (N->getOpcode() == ISD::EntryToken)
470  return false;
471  }
472 }
473 
474 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
475 /// the corresponding (lowered) CALLSEQ_BEGIN node.
476 ///
477 /// NestLevel and MaxNested are used in recursion to indcate the current level
478 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
479 /// level seen so far.
480 ///
481 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
482 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
483 static SDNode *
484 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
485  const TargetInstrInfo *TII) {
486  while (true) {
487  // For a TokenFactor, examine each operand. There may be multiple ways
488  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
489  // most nesting in order to ensure that we find the corresponding match.
490  if (N->getOpcode() == ISD::TokenFactor) {
491  SDNode *Best = nullptr;
492  unsigned BestMaxNest = MaxNest;
493  for (const SDValue &Op : N->op_values()) {
494  unsigned MyNestLevel = NestLevel;
495  unsigned MyMaxNest = MaxNest;
496  if (SDNode *New = FindCallSeqStart(Op.getNode(),
497  MyNestLevel, MyMaxNest, TII))
498  if (!Best || (MyMaxNest > BestMaxNest)) {
499  Best = New;
500  BestMaxNest = MyMaxNest;
501  }
502  }
503  assert(Best);
504  MaxNest = BestMaxNest;
505  return Best;
506  }
507  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
508  if (N->isMachineOpcode()) {
509  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
510  ++NestLevel;
511  MaxNest = std::max(MaxNest, NestLevel);
512  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
513  assert(NestLevel != 0);
514  --NestLevel;
515  if (NestLevel == 0)
516  return N;
517  }
518  }
519  // Otherwise, find the chain and continue climbing.
520  for (const SDValue &Op : N->op_values())
521  if (Op.getValueType() == MVT::Other) {
522  N = Op.getNode();
523  goto found_chain_operand;
524  }
525  return nullptr;
526  found_chain_operand:;
527  if (N->getOpcode() == ISD::EntryToken)
528  return nullptr;
529  }
530 }
531 
532 /// Call ReleasePred for each predecessor, then update register live def/gen.
533 /// Always update LiveRegDefs for a register dependence even if the current SU
534 /// also defines the register. This effectively create one large live range
535 /// across a sequence of two-address node. This is important because the
536 /// entire chain must be scheduled together. Example:
537 ///
538 /// flags = (3) add
539 /// flags = (2) addc flags
540 /// flags = (1) addc flags
541 ///
542 /// results in
543 ///
544 /// LiveRegDefs[flags] = 3
545 /// LiveRegGens[flags] = 1
546 ///
547 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
548 /// interference on flags.
549 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
550  // Bottom up: release predecessors
551  for (SDep &Pred : SU->Preds) {
552  ReleasePred(SU, &Pred);
553  if (Pred.isAssignedRegDep()) {
554  // This is a physical register dependency and it's impossible or
555  // expensive to copy the register. Make sure nothing that can
556  // clobber the register is scheduled between the predecessor and
557  // this node.
558  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
559  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
560  "interference on register dependence");
561  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
562  if (!LiveRegGens[Pred.getReg()]) {
563  ++NumLiveRegs;
564  LiveRegGens[Pred.getReg()] = SU;
565  }
566  }
567  }
568 
569  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
570  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
571  // these nodes, to prevent other calls from being interscheduled with them.
572  unsigned CallResource = TRI->getNumRegs();
573  if (!LiveRegDefs[CallResource])
574  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
575  if (Node->isMachineOpcode() &&
576  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
577  unsigned NestLevel = 0;
578  unsigned MaxNest = 0;
579  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
580  assert(N && "Must find call sequence start");
581 
582  SUnit *Def = &SUnits[N->getNodeId()];
583  CallSeqEndForStart[Def] = SU;
584 
585  ++NumLiveRegs;
586  LiveRegDefs[CallResource] = Def;
587  LiveRegGens[CallResource] = SU;
588  break;
589  }
590 }
591 
592 /// Check to see if any of the pending instructions are ready to issue. If
593 /// so, add them to the available queue.
594 void ScheduleDAGRRList::ReleasePending() {
595  if (DisableSchedCycles) {
596  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
597  return;
598  }
599 
600  // If the available queue is empty, it is safe to reset MinAvailableCycle.
601  if (AvailableQueue->empty())
602  MinAvailableCycle = std::numeric_limits<unsigned>::max();
603 
604  // Check to see if any of the pending instructions are ready to issue. If
605  // so, add them to the available queue.
606  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
607  unsigned ReadyCycle = PendingQueue[i]->getHeight();
608  if (ReadyCycle < MinAvailableCycle)
609  MinAvailableCycle = ReadyCycle;
610 
611  if (PendingQueue[i]->isAvailable) {
612  if (!isReady(PendingQueue[i]))
613  continue;
614  AvailableQueue->push(PendingQueue[i]);
615  }
616  PendingQueue[i]->isPending = false;
617  PendingQueue[i] = PendingQueue.back();
618  PendingQueue.pop_back();
619  --i; --e;
620  }
621 }
622 
623 /// Move the scheduler state forward by the specified number of Cycles.
624 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
625  if (NextCycle <= CurCycle)
626  return;
627 
628  IssueCount = 0;
629  AvailableQueue->setCurCycle(NextCycle);
630  if (!HazardRec->isEnabled()) {
631  // Bypass lots of virtual calls in case of long latency.
632  CurCycle = NextCycle;
633  }
634  else {
635  for (; CurCycle != NextCycle; ++CurCycle) {
636  HazardRec->RecedeCycle();
637  }
638  }
639  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
640  // available Q to release pending nodes at least once before popping.
641  ReleasePending();
642 }
643 
644 /// Move the scheduler state forward until the specified node's dependents are
645 /// ready and can be scheduled with no resource conflicts.
646 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
647  if (DisableSchedCycles)
648  return;
649 
650  // FIXME: Nodes such as CopyFromReg probably should not advance the current
651  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
652  // has predecessors the cycle will be advanced when they are scheduled.
653  // But given the crude nature of modeling latency though such nodes, we
654  // currently need to treat these nodes like real instructions.
655  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
656 
657  unsigned ReadyCycle = SU->getHeight();
658 
659  // Bump CurCycle to account for latency. We assume the latency of other
660  // available instructions may be hidden by the stall (not a full pipe stall).
661  // This updates the hazard recognizer's cycle before reserving resources for
662  // this instruction.
663  AdvanceToCycle(ReadyCycle);
664 
665  // Calls are scheduled in their preceding cycle, so don't conflict with
666  // hazards from instructions after the call. EmitNode will reset the
667  // scoreboard state before emitting the call.
668  if (SU->isCall)
669  return;
670 
671  // FIXME: For resource conflicts in very long non-pipelined stages, we
672  // should probably skip ahead here to avoid useless scoreboard checks.
673  int Stalls = 0;
674  while (true) {
676  HazardRec->getHazardType(SU, -Stalls);
677 
679  break;
680 
681  ++Stalls;
682  }
683  AdvanceToCycle(CurCycle + Stalls);
684 }
685 
686 /// Record this SUnit in the HazardRecognizer.
687 /// Does not update CurCycle.
688 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
689  if (!HazardRec->isEnabled())
690  return;
691 
692  // Check for phys reg copy.
693  if (!SU->getNode())
694  return;
695 
696  switch (SU->getNode()->getOpcode()) {
697  default:
698  assert(SU->getNode()->isMachineOpcode() &&
699  "This target-independent node should not be scheduled.");
700  break;
701  case ISD::MERGE_VALUES:
702  case ISD::TokenFactor:
703  case ISD::LIFETIME_START:
704  case ISD::LIFETIME_END:
705  case ISD::CopyToReg:
706  case ISD::CopyFromReg:
707  case ISD::EH_LABEL:
708  // Noops don't affect the scoreboard state. Copies are likely to be
709  // removed.
710  return;
711  case ISD::INLINEASM:
712  // For inline asm, clear the pipeline state.
713  HazardRec->Reset();
714  return;
715  }
716  if (SU->isCall) {
717  // Calls are scheduled with their preceding instructions. For bottom-up
718  // scheduling, clear the pipeline state before emitting.
719  HazardRec->Reset();
720  }
721 
722  HazardRec->EmitInstruction(SU);
723 }
724 
725 static void resetVRegCycle(SUnit *SU);
726 
727 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
728 /// count of its predecessors. If a predecessor pending count is zero, add it to
729 /// the Available queue.
730 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
731  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
732  DEBUG(SU->dump(this));
733 
734 #ifndef NDEBUG
735  if (CurCycle < SU->getHeight())
736  DEBUG(dbgs() << " Height [" << SU->getHeight()
737  << "] pipeline stall!\n");
738 #endif
739 
740  // FIXME: Do not modify node height. It may interfere with
741  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
742  // node its ready cycle can aid heuristics, and after scheduling it can
743  // indicate the scheduled cycle.
744  SU->setHeightToAtLeast(CurCycle);
745 
746  // Reserve resources for the scheduled instruction.
747  EmitNode(SU);
748 
749  Sequence.push_back(SU);
750 
751  AvailableQueue->scheduledNode(SU);
752 
753  // If HazardRec is disabled, and each inst counts as one cycle, then
754  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
755  // PendingQueue for schedulers that implement HasReadyFilter.
756  if (!HazardRec->isEnabled() && AvgIPC < 2)
757  AdvanceToCycle(CurCycle + 1);
758 
759  // Update liveness of predecessors before successors to avoid treating a
760  // two-address node as a live range def.
761  ReleasePredecessors(SU);
762 
763  // Release all the implicit physical register defs that are live.
764  for (SDep &Succ : SU->Succs) {
765  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
766  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
767  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
768  --NumLiveRegs;
769  LiveRegDefs[Succ.getReg()] = nullptr;
770  LiveRegGens[Succ.getReg()] = nullptr;
771  releaseInterferences(Succ.getReg());
772  }
773  }
774  // Release the special call resource dependence, if this is the beginning
775  // of a call.
776  unsigned CallResource = TRI->getNumRegs();
777  if (LiveRegDefs[CallResource] == SU)
778  for (const SDNode *SUNode = SU->getNode(); SUNode;
779  SUNode = SUNode->getGluedNode()) {
780  if (SUNode->isMachineOpcode() &&
781  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
782  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
783  --NumLiveRegs;
784  LiveRegDefs[CallResource] = nullptr;
785  LiveRegGens[CallResource] = nullptr;
786  releaseInterferences(CallResource);
787  }
788  }
789 
790  resetVRegCycle(SU);
791 
792  SU->isScheduled = true;
793 
794  // Conditions under which the scheduler should eagerly advance the cycle:
795  // (1) No available instructions
796  // (2) All pipelines full, so available instructions must have hazards.
797  //
798  // If HazardRec is disabled, the cycle was pre-advanced before calling
799  // ReleasePredecessors. In that case, IssueCount should remain 0.
800  //
801  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
802  if (HazardRec->isEnabled() || AvgIPC > 1) {
803  if (SU->getNode() && SU->getNode()->isMachineOpcode())
804  ++IssueCount;
805  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
806  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
807  AdvanceToCycle(CurCycle + 1);
808  }
809 }
810 
811 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
812 /// unscheduled, increase the succ left count of its predecessors. Remove
813 /// them from AvailableQueue if necessary.
814 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
815  SUnit *PredSU = PredEdge->getSUnit();
816  if (PredSU->isAvailable) {
817  PredSU->isAvailable = false;
818  if (!PredSU->isPending)
819  AvailableQueue->remove(PredSU);
820  }
821 
823  "NumSuccsLeft will overflow!");
824  ++PredSU->NumSuccsLeft;
825 }
826 
827 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
828 /// its predecessor states to reflect the change.
829 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
830  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
831  DEBUG(SU->dump(this));
832 
833  for (SDep &Pred : SU->Preds) {
834  CapturePred(&Pred);
835  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
836  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
837  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
838  "Physical register dependency violated?");
839  --NumLiveRegs;
840  LiveRegDefs[Pred.getReg()] = nullptr;
841  LiveRegGens[Pred.getReg()] = nullptr;
842  releaseInterferences(Pred.getReg());
843  }
844  }
845 
846  // Reclaim the special call resource dependence, if this is the beginning
847  // of a call.
848  unsigned CallResource = TRI->getNumRegs();
849  for (const SDNode *SUNode = SU->getNode(); SUNode;
850  SUNode = SUNode->getGluedNode()) {
851  if (SUNode->isMachineOpcode() &&
852  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
853  SUnit *SeqEnd = CallSeqEndForStart[SU];
854  assert(SeqEnd && "Call sequence start/end must be known");
855  assert(!LiveRegDefs[CallResource]);
856  assert(!LiveRegGens[CallResource]);
857  ++NumLiveRegs;
858  LiveRegDefs[CallResource] = SU;
859  LiveRegGens[CallResource] = SeqEnd;
860  }
861  }
862 
863  // Release the special call resource dependence, if this is the end
864  // of a call.
865  if (LiveRegGens[CallResource] == SU)
866  for (const SDNode *SUNode = SU->getNode(); SUNode;
867  SUNode = SUNode->getGluedNode()) {
868  if (SUNode->isMachineOpcode() &&
869  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
870  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
871  assert(LiveRegDefs[CallResource]);
872  assert(LiveRegGens[CallResource]);
873  --NumLiveRegs;
874  LiveRegDefs[CallResource] = nullptr;
875  LiveRegGens[CallResource] = nullptr;
876  releaseInterferences(CallResource);
877  }
878  }
879 
880  for (auto &Succ : SU->Succs) {
881  if (Succ.isAssignedRegDep()) {
882  auto Reg = Succ.getReg();
883  if (!LiveRegDefs[Reg])
884  ++NumLiveRegs;
885  // This becomes the nearest def. Note that an earlier def may still be
886  // pending if this is a two-address node.
887  LiveRegDefs[Reg] = SU;
888 
889  // Update LiveRegGen only if was empty before this unscheduling.
890  // This is to avoid incorrect updating LiveRegGen set in previous run.
891  if (!LiveRegGens[Reg]) {
892  // Find the successor with the lowest height.
893  LiveRegGens[Reg] = Succ.getSUnit();
894  for (auto &Succ2 : SU->Succs) {
895  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
896  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
897  LiveRegGens[Reg] = Succ2.getSUnit();
898  }
899  }
900  }
901  }
902  if (SU->getHeight() < MinAvailableCycle)
903  MinAvailableCycle = SU->getHeight();
904 
905  SU->setHeightDirty();
906  SU->isScheduled = false;
907  SU->isAvailable = true;
908  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
909  // Don't make available until backtracking is complete.
910  SU->isPending = true;
911  PendingQueue.push_back(SU);
912  }
913  else {
914  AvailableQueue->push(SU);
915  }
916  AvailableQueue->unscheduledNode(SU);
917 }
918 
919 /// After backtracking, the hazard checker needs to be restored to a state
920 /// corresponding the current cycle.
921 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
922  HazardRec->Reset();
923 
924  unsigned LookAhead = std::min((unsigned)Sequence.size(),
925  HazardRec->getMaxLookAhead());
926  if (LookAhead == 0)
927  return;
928 
929  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
930  unsigned HazardCycle = (*I)->getHeight();
931  for (auto E = Sequence.end(); I != E; ++I) {
932  SUnit *SU = *I;
933  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
934  HazardRec->RecedeCycle();
935  }
936  EmitNode(SU);
937  }
938 }
939 
940 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
941 /// BTCycle in order to schedule a specific node.
942 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
943  SUnit *OldSU = Sequence.back();
944  while (true) {
945  Sequence.pop_back();
946  // FIXME: use ready cycle instead of height
947  CurCycle = OldSU->getHeight();
948  UnscheduleNodeBottomUp(OldSU);
949  AvailableQueue->setCurCycle(CurCycle);
950  if (OldSU == BtSU)
951  break;
952  OldSU = Sequence.back();
953  }
954 
955  assert(!SU->isSucc(OldSU) && "Something is wrong!");
956 
957  RestoreHazardCheckerBottomUp();
958 
959  ReleasePending();
960 
961  ++NumBacktracks;
962 }
963 
964 static bool isOperandOf(const SUnit *SU, SDNode *N) {
965  for (const SDNode *SUNode = SU->getNode(); SUNode;
966  SUNode = SUNode->getGluedNode()) {
967  if (SUNode->isOperandOf(N))
968  return true;
969  }
970  return false;
971 }
972 
973 /// TryUnfold - Attempt to unfold
974 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
975  SDNode *N = SU->getNode();
976  // Use while over if to ease fall through.
977  SmallVector<SDNode *, 2> NewNodes;
978  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
979  return nullptr;
980 
981  // unfolding an x86 DEC64m operation results in store, dec, load which
982  // can't be handled here so quit
983  if (NewNodes.size() == 3)
984  return nullptr;
985 
986  assert(NewNodes.size() == 2 && "Expected a load folding node!");
987 
988  N = NewNodes[1];
989  SDNode *LoadNode = NewNodes[0];
990  unsigned NumVals = N->getNumValues();
991  unsigned OldNumVals = SU->getNode()->getNumValues();
992 
993  // LoadNode may already exist. This can happen when there is another
994  // load from the same location and producing the same type of value
995  // but it has different alignment or volatileness.
996  bool isNewLoad = true;
997  SUnit *LoadSU;
998  if (LoadNode->getNodeId() != -1) {
999  LoadSU = &SUnits[LoadNode->getNodeId()];
1000  // If LoadSU has already been scheduled, we should clone it but
1001  // this would negate the benefit to unfolding so just return SU.
1002  if (LoadSU->isScheduled)
1003  return SU;
1004  isNewLoad = false;
1005  } else {
1006  LoadSU = CreateNewSUnit(LoadNode);
1007  LoadNode->setNodeId(LoadSU->NodeNum);
1008 
1009  InitNumRegDefsLeft(LoadSU);
1010  computeLatency(LoadSU);
1011  }
1012 
1013  DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1014 
1015  // Now that we are committed to unfolding replace DAG Uses.
1016  for (unsigned i = 0; i != NumVals; ++i)
1017  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1018  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1019  SDValue(LoadNode, 1));
1020 
1021  SUnit *NewSU = CreateNewSUnit(N);
1022  assert(N->getNodeId() == -1 && "Node already inserted!");
1023  N->setNodeId(NewSU->NodeNum);
1024 
1025  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1026  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1027  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1028  NewSU->isTwoAddress = true;
1029  break;
1030  }
1031  }
1032  if (MCID.isCommutable())
1033  NewSU->isCommutable = true;
1034 
1035  InitNumRegDefsLeft(NewSU);
1036  computeLatency(NewSU);
1037 
1038  // Record all the edges to and from the old SU, by category.
1039  SmallVector<SDep, 4> ChainPreds;
1040  SmallVector<SDep, 4> ChainSuccs;
1041  SmallVector<SDep, 4> LoadPreds;
1042  SmallVector<SDep, 4> NodePreds;
1043  SmallVector<SDep, 4> NodeSuccs;
1044  for (SDep &Pred : SU->Preds) {
1045  if (Pred.isCtrl())
1046  ChainPreds.push_back(Pred);
1047  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1048  LoadPreds.push_back(Pred);
1049  else
1050  NodePreds.push_back(Pred);
1051  }
1052  for (SDep &Succ : SU->Succs) {
1053  if (Succ.isCtrl())
1054  ChainSuccs.push_back(Succ);
1055  else
1056  NodeSuccs.push_back(Succ);
1057  }
1058 
1059  // Now assign edges to the newly-created nodes.
1060  for (const SDep &Pred : ChainPreds) {
1061  RemovePred(SU, Pred);
1062  if (isNewLoad)
1063  AddPred(LoadSU, Pred);
1064  }
1065  for (const SDep &Pred : LoadPreds) {
1066  RemovePred(SU, Pred);
1067  if (isNewLoad)
1068  AddPred(LoadSU, Pred);
1069  }
1070  for (const SDep &Pred : NodePreds) {
1071  RemovePred(SU, Pred);
1072  AddPred(NewSU, Pred);
1073  }
1074  for (SDep D : NodeSuccs) {
1075  SUnit *SuccDep = D.getSUnit();
1076  D.setSUnit(SU);
1077  RemovePred(SuccDep, D);
1078  D.setSUnit(NewSU);
1079  AddPred(SuccDep, D);
1080  // Balance register pressure.
1081  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1082  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1083  --NewSU->NumRegDefsLeft;
1084  }
1085  for (SDep D : ChainSuccs) {
1086  SUnit *SuccDep = D.getSUnit();
1087  D.setSUnit(SU);
1088  RemovePred(SuccDep, D);
1089  if (isNewLoad) {
1090  D.setSUnit(LoadSU);
1091  AddPred(SuccDep, D);
1092  }
1093  }
1094 
1095  // Add a data dependency to reflect that NewSU reads the value defined
1096  // by LoadSU.
1097  SDep D(LoadSU, SDep::Data, 0);
1098  D.setLatency(LoadSU->Latency);
1099  AddPred(NewSU, D);
1100 
1101  if (isNewLoad)
1102  AvailableQueue->addNode(LoadSU);
1103  AvailableQueue->addNode(NewSU);
1104 
1105  ++NumUnfolds;
1106 
1107  if (NewSU->NumSuccsLeft == 0)
1108  NewSU->isAvailable = true;
1109 
1110  return NewSU;
1111 }
1112 
1113 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1114 /// successors to the newly created node.
1115 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1116  SDNode *N = SU->getNode();
1117  if (!N)
1118  return nullptr;
1119 
1120  if (SU->getNode()->getGluedNode())
1121  return nullptr;
1122 
1123  SUnit *NewSU;
1124  bool TryUnfold = false;
1125  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1126  MVT VT = N->getSimpleValueType(i);
1127  if (VT == MVT::Glue)
1128  return nullptr;
1129  else if (VT == MVT::Other)
1130  TryUnfold = true;
1131  }
1132  for (const SDValue &Op : N->op_values()) {
1133  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1134  if (VT == MVT::Glue)
1135  return nullptr;
1136  }
1137 
1138  // If possible unfold instruction.
1139  if (TryUnfold) {
1140  SUnit *UnfoldSU = TryUnfoldSU(SU);
1141  if (!UnfoldSU)
1142  return nullptr;
1143  SU = UnfoldSU;
1144  N = SU->getNode();
1145  // If this can be scheduled don't bother duplicating and just return
1146  if (SU->NumSuccsLeft == 0)
1147  return SU;
1148  }
1149 
1150  DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1151  NewSU = CreateClone(SU);
1152 
1153  // New SUnit has the exact same predecessors.
1154  for (SDep &Pred : SU->Preds)
1155  if (!Pred.isArtificial())
1156  AddPred(NewSU, Pred);
1157 
1158  // Only copy scheduled successors. Cut them from old node's successor
1159  // list and move them over.
1161  for (SDep &Succ : SU->Succs) {
1162  if (Succ.isArtificial())
1163  continue;
1164  SUnit *SuccSU = Succ.getSUnit();
1165  if (SuccSU->isScheduled) {
1166  SDep D = Succ;
1167  D.setSUnit(NewSU);
1168  AddPred(SuccSU, D);
1169  D.setSUnit(SU);
1170  DelDeps.push_back(std::make_pair(SuccSU, D));
1171  }
1172  }
1173  for (auto &DelDep : DelDeps)
1174  RemovePred(DelDep.first, DelDep.second);
1175 
1176  AvailableQueue->updateNode(SU);
1177  AvailableQueue->addNode(NewSU);
1178 
1179  ++NumDups;
1180  return NewSU;
1181 }
1182 
1183 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1184 /// scheduled successors of the given SUnit to the last copy.
1185 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1186  const TargetRegisterClass *DestRC,
1187  const TargetRegisterClass *SrcRC,
1188  SmallVectorImpl<SUnit*> &Copies) {
1189  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1190  CopyFromSU->CopySrcRC = SrcRC;
1191  CopyFromSU->CopyDstRC = DestRC;
1192 
1193  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1194  CopyToSU->CopySrcRC = DestRC;
1195  CopyToSU->CopyDstRC = SrcRC;
1196 
1197  // Only copy scheduled successors. Cut them from old node's successor
1198  // list and move them over.
1200  for (SDep &Succ : SU->Succs) {
1201  if (Succ.isArtificial())
1202  continue;
1203  SUnit *SuccSU = Succ.getSUnit();
1204  if (SuccSU->isScheduled) {
1205  SDep D = Succ;
1206  D.setSUnit(CopyToSU);
1207  AddPred(SuccSU, D);
1208  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1209  }
1210  else {
1211  // Avoid scheduling the def-side copy before other successors. Otherwise
1212  // we could introduce another physreg interference on the copy and
1213  // continue inserting copies indefinitely.
1214  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1215  }
1216  }
1217  for (auto &DelDep : DelDeps)
1218  RemovePred(DelDep.first, DelDep.second);
1219 
1220  SDep FromDep(SU, SDep::Data, Reg);
1221  FromDep.setLatency(SU->Latency);
1222  AddPred(CopyFromSU, FromDep);
1223  SDep ToDep(CopyFromSU, SDep::Data, 0);
1224  ToDep.setLatency(CopyFromSU->Latency);
1225  AddPred(CopyToSU, ToDep);
1226 
1227  AvailableQueue->updateNode(SU);
1228  AvailableQueue->addNode(CopyFromSU);
1229  AvailableQueue->addNode(CopyToSU);
1230  Copies.push_back(CopyFromSU);
1231  Copies.push_back(CopyToSU);
1232 
1233  ++NumPRCopies;
1234 }
1235 
1236 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1237 /// definition of the specified node.
1238 /// FIXME: Move to SelectionDAG?
1239 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1240  const TargetInstrInfo *TII) {
1241  unsigned NumRes;
1242  if (N->getOpcode() == ISD::CopyFromReg) {
1243  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1244  NumRes = 1;
1245  } else {
1246  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1247  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1248  NumRes = MCID.getNumDefs();
1249  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1250  if (Reg == *ImpDef)
1251  break;
1252  ++NumRes;
1253  }
1254  }
1255  return N->getSimpleValueType(NumRes);
1256 }
1257 
1258 /// CheckForLiveRegDef - Return true and update live register vector if the
1259 /// specified register def of the specified SUnit clobbers any "live" registers.
1260 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1261  SUnit **LiveRegDefs,
1262  SmallSet<unsigned, 4> &RegAdded,
1264  const TargetRegisterInfo *TRI) {
1265  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1266 
1267  // Check if Ref is live.
1268  if (!LiveRegDefs[*AliasI]) continue;
1269 
1270  // Allow multiple uses of the same def.
1271  if (LiveRegDefs[*AliasI] == SU) continue;
1272 
1273  // Add Reg to the set of interfering live regs.
1274  if (RegAdded.insert(*AliasI).second) {
1275  LRegs.push_back(*AliasI);
1276  }
1277  }
1278 }
1279 
1280 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1281 /// by RegMask, and add them to LRegs.
1282 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1283  ArrayRef<SUnit*> LiveRegDefs,
1284  SmallSet<unsigned, 4> &RegAdded,
1285  SmallVectorImpl<unsigned> &LRegs) {
1286  // Look at all live registers. Skip Reg0 and the special CallResource.
1287  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1288  if (!LiveRegDefs[i]) continue;
1289  if (LiveRegDefs[i] == SU) continue;
1290  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1291  if (RegAdded.insert(i).second)
1292  LRegs.push_back(i);
1293  }
1294 }
1295 
1296 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1297 static const uint32_t *getNodeRegMask(const SDNode *N) {
1298  for (const SDValue &Op : N->op_values())
1299  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1300  return RegOp->getRegMask();
1301  return nullptr;
1302 }
1303 
1304 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1305 /// scheduling of the given node to satisfy live physical register dependencies.
1306 /// If the specific node is the last one that's available to schedule, do
1307 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1308 bool ScheduleDAGRRList::
1309 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1310  if (NumLiveRegs == 0)
1311  return false;
1312 
1313  SmallSet<unsigned, 4> RegAdded;
1314  // If this node would clobber any "live" register, then it's not ready.
1315  //
1316  // If SU is the currently live definition of the same register that it uses,
1317  // then we are free to schedule it.
1318  for (SDep &Pred : SU->Preds) {
1319  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1320  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1321  RegAdded, LRegs, TRI);
1322  }
1323 
1324  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1325  if (Node->getOpcode() == ISD::INLINEASM) {
1326  // Inline asm can clobber physical defs.
1327  unsigned NumOps = Node->getNumOperands();
1328  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1329  --NumOps; // Ignore the glue operand.
1330 
1331  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1332  unsigned Flags =
1333  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1334  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1335 
1336  ++i; // Skip the ID value.
1337  if (InlineAsm::isRegDefKind(Flags) ||
1339  InlineAsm::isClobberKind(Flags)) {
1340  // Check for def of register or earlyclobber register.
1341  for (; NumVals; --NumVals, ++i) {
1342  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1344  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1345  }
1346  } else
1347  i += NumVals;
1348  }
1349  continue;
1350  }
1351 
1352  if (!Node->isMachineOpcode())
1353  continue;
1354  // If we're in the middle of scheduling a call, don't begin scheduling
1355  // another call. Also, don't allow any physical registers to be live across
1356  // the call.
1357  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1358  // Check the special calling-sequence resource.
1359  unsigned CallResource = TRI->getNumRegs();
1360  if (LiveRegDefs[CallResource]) {
1361  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1362  while (SDNode *Glued = Gen->getGluedNode())
1363  Gen = Glued;
1364  if (!IsChainDependent(Gen, Node, 0, TII) &&
1365  RegAdded.insert(CallResource).second)
1366  LRegs.push_back(CallResource);
1367  }
1368  }
1369  if (const uint32_t *RegMask = getNodeRegMask(Node))
1370  CheckForLiveRegDefMasked(SU, RegMask,
1371  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1372  RegAdded, LRegs);
1373 
1374  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1375  if (MCID.hasOptionalDef()) {
1376  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1377  // This operand can be either a def of CPSR, if the S bit is set; or a use
1378  // of %noreg. When the OptionalDef is set to a valid register, we need to
1379  // handle it in the same way as an ImplicitDef.
1380  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1381  if (MCID.OpInfo[i].isOptionalDef()) {
1382  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1383  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1384  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1385  }
1386  }
1387  if (!MCID.ImplicitDefs)
1388  continue;
1389  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1390  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1391  }
1392 
1393  return !LRegs.empty();
1394 }
1395 
1396 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1397  // Add the nodes that aren't ready back onto the available list.
1398  for (unsigned i = Interferences.size(); i > 0; --i) {
1399  SUnit *SU = Interferences[i-1];
1400  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1401  if (Reg) {
1402  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1403  if (!is_contained(LRegs, Reg))
1404  continue;
1405  }
1406  SU->isPending = false;
1407  // The interfering node may no longer be available due to backtracking.
1408  // Furthermore, it may have been made available again, in which case it is
1409  // now already in the AvailableQueue.
1410  if (SU->isAvailable && !SU->NodeQueueId) {
1411  DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1412  AvailableQueue->push(SU);
1413  }
1414  if (i < Interferences.size())
1415  Interferences[i-1] = Interferences.back();
1416  Interferences.pop_back();
1417  LRegsMap.erase(LRegsPos);
1418  }
1419 }
1420 
1421 /// Return a node that can be scheduled in this cycle. Requirements:
1422 /// (1) Ready: latency has been satisfied
1423 /// (2) No Hazards: resources are available
1424 /// (3) No Interferences: may unschedule to break register interferences.
1425 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1426  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1427  auto FindAvailableNode = [&]() {
1428  while (CurSU) {
1430  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1431  break;
1432  DEBUG(dbgs() << " Interfering reg ";
1433  if (LRegs[0] == TRI->getNumRegs())
1434  dbgs() << "CallResource";
1435  else
1436  dbgs() << printReg(LRegs[0], TRI);
1437  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1438  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1439  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1440  if (LRegsPair.second) {
1441  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1442  Interferences.push_back(CurSU);
1443  }
1444  else {
1445  assert(CurSU->isPending && "Interferences are pending");
1446  // Update the interference with current live regs.
1447  LRegsPair.first->second = LRegs;
1448  }
1449  CurSU = AvailableQueue->pop();
1450  }
1451  };
1452  FindAvailableNode();
1453  if (CurSU)
1454  return CurSU;
1455 
1456  // All candidates are delayed due to live physical reg dependencies.
1457  // Try backtracking, code duplication, or inserting cross class copies
1458  // to resolve it.
1459  for (SUnit *TrySU : Interferences) {
1460  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1461 
1462  // Try unscheduling up to the point where it's safe to schedule
1463  // this node.
1464  SUnit *BtSU = nullptr;
1465  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1466  for (unsigned Reg : LRegs) {
1467  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1468  BtSU = LiveRegGens[Reg];
1469  LiveCycle = BtSU->getHeight();
1470  }
1471  }
1472  if (!WillCreateCycle(TrySU, BtSU)) {
1473  // BacktrackBottomUp mutates Interferences!
1474  BacktrackBottomUp(TrySU, BtSU);
1475 
1476  // Force the current node to be scheduled before the node that
1477  // requires the physical reg dep.
1478  if (BtSU->isAvailable) {
1479  BtSU->isAvailable = false;
1480  if (!BtSU->isPending)
1481  AvailableQueue->remove(BtSU);
1482  }
1483  DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1484  << TrySU->NodeNum << ")\n");
1485  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1486 
1487  // If one or more successors has been unscheduled, then the current
1488  // node is no longer available.
1489  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1490  DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1491  CurSU = AvailableQueue->pop();
1492  } else {
1493  DEBUG(dbgs() << "TrySU available\n");
1494  // Available and in AvailableQueue
1495  AvailableQueue->remove(TrySU);
1496  CurSU = TrySU;
1497  }
1498  FindAvailableNode();
1499  // Interferences has been mutated. We must break.
1500  break;
1501  }
1502  }
1503 
1504  if (!CurSU) {
1505  // Can't backtrack. If it's too expensive to copy the value, then try
1506  // duplicate the nodes that produces these "too expensive to copy"
1507  // values to break the dependency. In case even that doesn't work,
1508  // insert cross class copies.
1509  // If it's not too expensive, i.e. cost != -1, issue copies.
1510  SUnit *TrySU = Interferences[0];
1511  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1512  assert(LRegs.size() == 1 && "Can't handle this yet!");
1513  unsigned Reg = LRegs[0];
1514  SUnit *LRDef = LiveRegDefs[Reg];
1515  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1516  const TargetRegisterClass *RC =
1517  TRI->getMinimalPhysRegClass(Reg, VT);
1518  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1519 
1520  // If cross copy register class is the same as RC, then it must be possible
1521  // copy the value directly. Do not try duplicate the def.
1522  // If cross copy register class is not the same as RC, then it's possible to
1523  // copy the value but it require cross register class copies and it is
1524  // expensive.
1525  // If cross copy register class is null, then it's not possible to copy
1526  // the value at all.
1527  SUnit *NewDef = nullptr;
1528  if (DestRC != RC) {
1529  NewDef = CopyAndMoveSuccessors(LRDef);
1530  if (!DestRC && !NewDef)
1531  report_fatal_error("Can't handle live physical register dependency!");
1532  }
1533  if (!NewDef) {
1534  // Issue copies, these can be expensive cross register class copies.
1535  SmallVector<SUnit*, 2> Copies;
1536  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1537  DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1538  << " to SU #" << Copies.front()->NodeNum << "\n");
1539  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1540  NewDef = Copies.back();
1541  }
1542 
1543  DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1544  << " to SU #" << TrySU->NodeNum << "\n");
1545  LiveRegDefs[Reg] = NewDef;
1546  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1547  TrySU->isAvailable = false;
1548  CurSU = NewDef;
1549  }
1550  assert(CurSU && "Unable to resolve live physical register dependencies!");
1551  return CurSU;
1552 }
1553 
1554 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1555 /// schedulers.
1556 void ScheduleDAGRRList::ListScheduleBottomUp() {
1557  // Release any predecessors of the special Exit node.
1558  ReleasePredecessors(&ExitSU);
1559 
1560  // Add root to Available queue.
1561  if (!SUnits.empty()) {
1562  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1563  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1564  RootSU->isAvailable = true;
1565  AvailableQueue->push(RootSU);
1566  }
1567 
1568  // While Available queue is not empty, grab the node with the highest
1569  // priority. If it is not ready put it back. Schedule the node.
1570  Sequence.reserve(SUnits.size());
1571  while (!AvailableQueue->empty() || !Interferences.empty()) {
1572  DEBUG(dbgs() << "\nExamining Available:\n";
1573  AvailableQueue->dump(this));
1574 
1575  // Pick the best node to schedule taking all constraints into
1576  // consideration.
1577  SUnit *SU = PickNodeToScheduleBottomUp();
1578 
1579  AdvancePastStalls(SU);
1580 
1581  ScheduleNodeBottomUp(SU);
1582 
1583  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1584  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1585  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1586  "MinAvailableCycle uninitialized");
1587  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1588  }
1589  }
1590 
1591  // Reverse the order if it is bottom up.
1592  std::reverse(Sequence.begin(), Sequence.end());
1593 
1594 #ifndef NDEBUG
1595  VerifyScheduledSequence(/*isBottomUp=*/true);
1596 #endif
1597 }
1598 
1599 namespace {
1600 
1601 class RegReductionPQBase;
1602 
1603 struct queue_sort {
1604  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1605 };
1606 
1607 #ifndef NDEBUG
1608 template<class SF>
1609 struct reverse_sort : public queue_sort {
1610  SF &SortFunc;
1611 
1612  reverse_sort(SF &sf) : SortFunc(sf) {}
1613 
1614  bool operator()(SUnit* left, SUnit* right) const {
1615  // reverse left/right rather than simply !SortFunc(left, right)
1616  // to expose different paths in the comparison logic.
1617  return SortFunc(right, left);
1618  }
1619 };
1620 #endif // NDEBUG
1621 
1622 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1623 // reduction scheduler.
1624 struct bu_ls_rr_sort : public queue_sort {
1625  enum {
1626  IsBottomUp = true,
1627  HasReadyFilter = false
1628  };
1629 
1630  RegReductionPQBase *SPQ;
1631 
1632  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1633 
1634  bool operator()(SUnit* left, SUnit* right) const;
1635 };
1636 
1637 // src_ls_rr_sort - Priority function for source order scheduler.
1638 struct src_ls_rr_sort : public queue_sort {
1639  enum {
1640  IsBottomUp = true,
1641  HasReadyFilter = false
1642  };
1643 
1644  RegReductionPQBase *SPQ;
1645 
1646  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1647 
1648  bool operator()(SUnit* left, SUnit* right) const;
1649 };
1650 
1651 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1652 struct hybrid_ls_rr_sort : public queue_sort {
1653  enum {
1654  IsBottomUp = true,
1655  HasReadyFilter = false
1656  };
1657 
1658  RegReductionPQBase *SPQ;
1659 
1660  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1661 
1662  bool isReady(SUnit *SU, unsigned CurCycle) const;
1663 
1664  bool operator()(SUnit* left, SUnit* right) const;
1665 };
1666 
1667 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1668 // scheduler.
1669 struct ilp_ls_rr_sort : public queue_sort {
1670  enum {
1671  IsBottomUp = true,
1672  HasReadyFilter = false
1673  };
1674 
1675  RegReductionPQBase *SPQ;
1676 
1677  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1678 
1679  bool isReady(SUnit *SU, unsigned CurCycle) const;
1680 
1681  bool operator()(SUnit* left, SUnit* right) const;
1682 };
1683 
1684 class RegReductionPQBase : public SchedulingPriorityQueue {
1685 protected:
1686  std::vector<SUnit *> Queue;
1687  unsigned CurQueueId = 0;
1688  bool TracksRegPressure;
1689  bool SrcOrder;
1690 
1691  // SUnits - The SUnits for the current graph.
1692  std::vector<SUnit> *SUnits;
1693 
1694  MachineFunction &MF;
1695  const TargetInstrInfo *TII;
1696  const TargetRegisterInfo *TRI;
1697  const TargetLowering *TLI;
1698  ScheduleDAGRRList *scheduleDAG = nullptr;
1699 
1700  // SethiUllmanNumbers - The SethiUllman number for each node.
1701  std::vector<unsigned> SethiUllmanNumbers;
1702 
1703  /// RegPressure - Tracking current reg pressure per register class.
1704  std::vector<unsigned> RegPressure;
1705 
1706  /// RegLimit - Tracking the number of allocatable registers per register
1707  /// class.
1708  std::vector<unsigned> RegLimit;
1709 
1710 public:
1711  RegReductionPQBase(MachineFunction &mf,
1712  bool hasReadyFilter,
1713  bool tracksrp,
1714  bool srcorder,
1715  const TargetInstrInfo *tii,
1716  const TargetRegisterInfo *tri,
1717  const TargetLowering *tli)
1718  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1719  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1720  if (TracksRegPressure) {
1721  unsigned NumRC = TRI->getNumRegClasses();
1722  RegLimit.resize(NumRC);
1723  RegPressure.resize(NumRC);
1724  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1725  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1726  for (const TargetRegisterClass *RC : TRI->regclasses())
1727  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1728  }
1729  }
1730 
1731  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1732  scheduleDAG = scheduleDag;
1733  }
1734 
1735  ScheduleHazardRecognizer* getHazardRec() {
1736  return scheduleDAG->getHazardRec();
1737  }
1738 
1739  void initNodes(std::vector<SUnit> &sunits) override;
1740 
1741  void addNode(const SUnit *SU) override;
1742 
1743  void updateNode(const SUnit *SU) override;
1744 
1745  void releaseState() override {
1746  SUnits = nullptr;
1747  SethiUllmanNumbers.clear();
1748  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1749  }
1750 
1751  unsigned getNodePriority(const SUnit *SU) const;
1752 
1753  unsigned getNodeOrdering(const SUnit *SU) const {
1754  if (!SU->getNode()) return 0;
1755 
1756  return SU->getNode()->getIROrder();
1757  }
1758 
1759  bool empty() const override { return Queue.empty(); }
1760 
1761  void push(SUnit *U) override {
1762  assert(!U->NodeQueueId && "Node in the queue already");
1763  U->NodeQueueId = ++CurQueueId;
1764  Queue.push_back(U);
1765  }
1766 
1767  void remove(SUnit *SU) override {
1768  assert(!Queue.empty() && "Queue is empty!");
1769  assert(SU->NodeQueueId != 0 && "Not in queue!");
1770  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1771  if (I != std::prev(Queue.end()))
1772  std::swap(*I, Queue.back());
1773  Queue.pop_back();
1774  SU->NodeQueueId = 0;
1775  }
1776 
1777  bool tracksRegPressure() const override { return TracksRegPressure; }
1778 
1779  void dumpRegPressure() const;
1780 
1781  bool HighRegPressure(const SUnit *SU) const;
1782 
1783  bool MayReduceRegPressure(SUnit *SU) const;
1784 
1785  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1786 
1787  void scheduledNode(SUnit *SU) override;
1788 
1789  void unscheduledNode(SUnit *SU) override;
1790 
1791 protected:
1792  bool canClobber(const SUnit *SU, const SUnit *Op);
1793  void AddPseudoTwoAddrDeps();
1794  void PrescheduleNodesWithMultipleUses();
1795  void CalculateSethiUllmanNumbers();
1796 };
1797 
1798 template<class SF>
1799 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1800  std::vector<SUnit *>::iterator Best = Q.begin();
1801  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1802  if (Picker(*Best, *I))
1803  Best = I;
1804  SUnit *V = *Best;
1805  if (Best != std::prev(Q.end()))
1806  std::swap(*Best, Q.back());
1807  Q.pop_back();
1808  return V;
1809 }
1810 
1811 template<class SF>
1812 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1813 #ifndef NDEBUG
1814  if (DAG->StressSched) {
1815  reverse_sort<SF> RPicker(Picker);
1816  return popFromQueueImpl(Q, RPicker);
1817  }
1818 #endif
1819  (void)DAG;
1820  return popFromQueueImpl(Q, Picker);
1821 }
1822 
1823 //===----------------------------------------------------------------------===//
1824 // RegReductionPriorityQueue Definition
1825 //===----------------------------------------------------------------------===//
1826 //
1827 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1828 // to reduce register pressure.
1829 //
1830 template<class SF>
1831 class RegReductionPriorityQueue : public RegReductionPQBase {
1832  SF Picker;
1833 
1834 public:
1835  RegReductionPriorityQueue(MachineFunction &mf,
1836  bool tracksrp,
1837  bool srcorder,
1838  const TargetInstrInfo *tii,
1839  const TargetRegisterInfo *tri,
1840  const TargetLowering *tli)
1841  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1842  tii, tri, tli),
1843  Picker(this) {}
1844 
1845  bool isBottomUp() const override { return SF::IsBottomUp; }
1846 
1847  bool isReady(SUnit *U) const override {
1848  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1849  }
1850 
1851  SUnit *pop() override {
1852  if (Queue.empty()) return nullptr;
1853 
1854  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1855  V->NodeQueueId = 0;
1856  return V;
1857  }
1858 
1859 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1860  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1861  // Emulate pop() without clobbering NodeQueueIds.
1862  std::vector<SUnit *> DumpQueue = Queue;
1863  SF DumpPicker = Picker;
1864  while (!DumpQueue.empty()) {
1865  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1866  dbgs() << "Height " << SU->getHeight() << ": ";
1867  SU->dump(DAG);
1868  }
1869  }
1870 #endif
1871 };
1872 
1873 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1874 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1875 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1876 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1877 
1878 } // end anonymous namespace
1879 
1880 //===----------------------------------------------------------------------===//
1881 // Static Node Priority for Register Pressure Reduction
1882 //===----------------------------------------------------------------------===//
1883 
1884 // Check for special nodes that bypass scheduling heuristics.
1885 // Currently this pushes TokenFactor nodes down, but may be used for other
1886 // pseudo-ops as well.
1887 //
1888 // Return -1 to schedule right above left, 1 for left above right.
1889 // Return 0 if no bias exists.
1890 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1891  bool LSchedLow = left->isScheduleLow;
1892  bool RSchedLow = right->isScheduleLow;
1893  if (LSchedLow != RSchedLow)
1894  return LSchedLow < RSchedLow ? 1 : -1;
1895  return 0;
1896 }
1897 
1898 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1899 /// Smaller number is the higher priority.
1900 static unsigned
1901 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1902  if (SUNumbers[SU->NodeNum] != 0)
1903  return SUNumbers[SU->NodeNum];
1904 
1905  // Use WorkList to avoid stack overflow on excessively large IRs.
1906  struct WorkState {
1907  WorkState(const SUnit *SU) : SU(SU) {}
1908  const SUnit *SU;
1909  unsigned PredsProcessed = 0;
1910  };
1911 
1912  SmallVector<WorkState, 16> WorkList;
1913  WorkList.push_back(SU);
1914  while (!WorkList.empty()) {
1915  auto &Temp = WorkList.back();
1916  auto *TempSU = Temp.SU;
1917  bool AllPredsKnown = true;
1918  // Try to find a non-evaluated pred and push it into the processing stack.
1919  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1920  auto &Pred = TempSU->Preds[P];
1921  if (Pred.isCtrl()) continue; // ignore chain preds
1922  SUnit *PredSU = Pred.getSUnit();
1923  if (SUNumbers[PredSU->NodeNum] == 0) {
1924 #ifndef NDEBUG
1925  // In debug mode, check that we don't have such element in the stack.
1926  for (auto It : WorkList)
1927  assert(It.SU != PredSU && "Trying to push an element twice?");
1928 #endif
1929  // Next time start processing this one starting from the next pred.
1930  Temp.PredsProcessed = P + 1;
1931  WorkList.push_back(PredSU);
1932  AllPredsKnown = false;
1933  break;
1934  }
1935  }
1936 
1937  if (!AllPredsKnown)
1938  continue;
1939 
1940  // Once all preds are known, we can calculate the answer for this one.
1941  unsigned SethiUllmanNumber = 0;
1942  unsigned Extra = 0;
1943  for (const SDep &Pred : TempSU->Preds) {
1944  if (Pred.isCtrl()) continue; // ignore chain preds
1945  SUnit *PredSU = Pred.getSUnit();
1946  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1947  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1948  if (PredSethiUllman > SethiUllmanNumber) {
1949  SethiUllmanNumber = PredSethiUllman;
1950  Extra = 0;
1951  } else if (PredSethiUllman == SethiUllmanNumber)
1952  ++Extra;
1953  }
1954 
1955  SethiUllmanNumber += Extra;
1956  if (SethiUllmanNumber == 0)
1957  SethiUllmanNumber = 1;
1958  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1959  WorkList.pop_back();
1960  }
1961 
1962  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1963  return SUNumbers[SU->NodeNum];
1964 }
1965 
1966 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1967 /// scheduling units.
1968 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1969  SethiUllmanNumbers.assign(SUnits->size(), 0);
1970 
1971  for (const SUnit &SU : *SUnits)
1972  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1973 }
1974 
1975 void RegReductionPQBase::addNode(const SUnit *SU) {
1976  unsigned SUSize = SethiUllmanNumbers.size();
1977  if (SUnits->size() > SUSize)
1978  SethiUllmanNumbers.resize(SUSize*2, 0);
1979  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1980 }
1981 
1982 void RegReductionPQBase::updateNode(const SUnit *SU) {
1983  SethiUllmanNumbers[SU->NodeNum] = 0;
1984  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1985 }
1986 
1987 // Lower priority means schedule further down. For bottom-up scheduling, lower
1988 // priority SUs are scheduled before higher priority SUs.
1989 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1990  assert(SU->NodeNum < SethiUllmanNumbers.size());
1991  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1992  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1993  // CopyToReg should be close to its uses to facilitate coalescing and
1994  // avoid spilling.
1995  return 0;
1996  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1997  Opc == TargetOpcode::SUBREG_TO_REG ||
1998  Opc == TargetOpcode::INSERT_SUBREG)
1999  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2000  // close to their uses to facilitate coalescing.
2001  return 0;
2002  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2003  // If SU does not have a register use, i.e. it doesn't produce a value
2004  // that would be consumed (e.g. store), then it terminates a chain of
2005  // computation. Give it a large SethiUllman number so it will be
2006  // scheduled right before its predecessors that it doesn't lengthen
2007  // their live ranges.
2008  return 0xffff;
2009  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2010  // If SU does not have a register def, schedule it close to its uses
2011  // because it does not lengthen any live ranges.
2012  return 0;
2013 #if 1
2014  return SethiUllmanNumbers[SU->NodeNum];
2015 #else
2016  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2017  if (SU->isCallOp) {
2018  // FIXME: This assumes all of the defs are used as call operands.
2019  int NP = (int)Priority - SU->getNode()->getNumValues();
2020  return (NP > 0) ? NP : 0;
2021  }
2022  return Priority;
2023 #endif
2024 }
2025 
2026 //===----------------------------------------------------------------------===//
2027 // Register Pressure Tracking
2028 //===----------------------------------------------------------------------===//
2029 
2030 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2031 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2032  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2033  unsigned Id = RC->getID();
2034  unsigned RP = RegPressure[Id];
2035  if (!RP) continue;
2036  DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2037  << RegLimit[Id] << '\n');
2038  }
2039 }
2040 #endif
2041 
2042 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2043  if (!TLI)
2044  return false;
2045 
2046  for (const SDep &Pred : SU->Preds) {
2047  if (Pred.isCtrl())
2048  continue;
2049  SUnit *PredSU = Pred.getSUnit();
2050  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2051  // to cover the number of registers defined (they are all live).
2052  if (PredSU->NumRegDefsLeft == 0) {
2053  continue;
2054  }
2055  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2056  RegDefPos.IsValid(); RegDefPos.Advance()) {
2057  unsigned RCId, Cost;
2058  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2059 
2060  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2061  return true;
2062  }
2063  }
2064  return false;
2065 }
2066 
2067 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2068  const SDNode *N = SU->getNode();
2069 
2070  if (!N->isMachineOpcode() || !SU->NumSuccs)
2071  return false;
2072 
2073  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2074  for (unsigned i = 0; i != NumDefs; ++i) {
2075  MVT VT = N->getSimpleValueType(i);
2076  if (!N->hasAnyUseOfValue(i))
2077  continue;
2078  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2079  if (RegPressure[RCId] >= RegLimit[RCId])
2080  return true;
2081  }
2082  return false;
2083 }
2084 
2085 // Compute the register pressure contribution by this instruction by count up
2086 // for uses that are not live and down for defs. Only count register classes
2087 // that are already under high pressure. As a side effect, compute the number of
2088 // uses of registers that are already live.
2089 //
2090 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2091 // so could probably be factored.
2092 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2093  LiveUses = 0;
2094  int PDiff = 0;
2095  for (const SDep &Pred : SU->Preds) {
2096  if (Pred.isCtrl())
2097  continue;
2098  SUnit *PredSU = Pred.getSUnit();
2099  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2100  // to cover the number of registers defined (they are all live).
2101  if (PredSU->NumRegDefsLeft == 0) {
2102  if (PredSU->getNode()->isMachineOpcode())
2103  ++LiveUses;
2104  continue;
2105  }
2106  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2107  RegDefPos.IsValid(); RegDefPos.Advance()) {
2108  MVT VT = RegDefPos.GetValue();
2109  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2110  if (RegPressure[RCId] >= RegLimit[RCId])
2111  ++PDiff;
2112  }
2113  }
2114  const SDNode *N = SU->getNode();
2115 
2116  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2117  return PDiff;
2118 
2119  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2120  for (unsigned i = 0; i != NumDefs; ++i) {
2121  MVT VT = N->getSimpleValueType(i);
2122  if (!N->hasAnyUseOfValue(i))
2123  continue;
2124  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2125  if (RegPressure[RCId] >= RegLimit[RCId])
2126  --PDiff;
2127  }
2128  return PDiff;
2129 }
2130 
2131 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2132  if (!TracksRegPressure)
2133  return;
2134 
2135  if (!SU->getNode())
2136  return;
2137 
2138  for (const SDep &Pred : SU->Preds) {
2139  if (Pred.isCtrl())
2140  continue;
2141  SUnit *PredSU = Pred.getSUnit();
2142  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2143  // to cover the number of registers defined (they are all live).
2144  if (PredSU->NumRegDefsLeft == 0) {
2145  continue;
2146  }
2147  // FIXME: The ScheduleDAG currently loses information about which of a
2148  // node's values is consumed by each dependence. Consequently, if the node
2149  // defines multiple register classes, we don't know which to pressurize
2150  // here. Instead the following loop consumes the register defs in an
2151  // arbitrary order. At least it handles the common case of clustered loads
2152  // to the same class. For precise liveness, each SDep needs to indicate the
2153  // result number. But that tightly couples the ScheduleDAG with the
2154  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2155  // value type or register class to SDep.
2156  //
2157  // The most important aspect of register tracking is balancing the increase
2158  // here with the reduction further below. Note that this SU may use multiple
2159  // defs in PredSU. The can't be determined here, but we've already
2160  // compensated by reducing NumRegDefsLeft in PredSU during
2161  // ScheduleDAGSDNodes::AddSchedEdges.
2162  --PredSU->NumRegDefsLeft;
2163  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2164  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2165  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2166  if (SkipRegDefs)
2167  continue;
2168 
2169  unsigned RCId, Cost;
2170  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2171  RegPressure[RCId] += Cost;
2172  break;
2173  }
2174  }
2175 
2176  // We should have this assert, but there may be dead SDNodes that never
2177  // materialize as SUnits, so they don't appear to generate liveness.
2178  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2179  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2180  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2181  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2182  if (SkipRegDefs > 0)
2183  continue;
2184  unsigned RCId, Cost;
2185  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2186  if (RegPressure[RCId] < Cost) {
2187  // Register pressure tracking is imprecise. This can happen. But we try
2188  // hard not to let it happen because it likely results in poor scheduling.
2189  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
2190  RegPressure[RCId] = 0;
2191  }
2192  else {
2193  RegPressure[RCId] -= Cost;
2194  }
2195  }
2196  DEBUG(dumpRegPressure());
2197 }
2198 
2199 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2200  if (!TracksRegPressure)
2201  return;
2202 
2203  const SDNode *N = SU->getNode();
2204  if (!N) return;
2205 
2206  if (!N->isMachineOpcode()) {
2207  if (N->getOpcode() != ISD::CopyToReg)
2208  return;
2209  } else {
2210  unsigned Opc = N->getMachineOpcode();
2211  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2212  Opc == TargetOpcode::INSERT_SUBREG ||
2213  Opc == TargetOpcode::SUBREG_TO_REG ||
2214  Opc == TargetOpcode::REG_SEQUENCE ||
2215  Opc == TargetOpcode::IMPLICIT_DEF)
2216  return;
2217  }
2218 
2219  for (const SDep &Pred : SU->Preds) {
2220  if (Pred.isCtrl())
2221  continue;
2222  SUnit *PredSU = Pred.getSUnit();
2223  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2224  // counts data deps.
2225  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2226  continue;
2227  const SDNode *PN = PredSU->getNode();
2228  if (!PN->isMachineOpcode()) {
2229  if (PN->getOpcode() == ISD::CopyFromReg) {
2230  MVT VT = PN->getSimpleValueType(0);
2231  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2232  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2233  }
2234  continue;
2235  }
2236  unsigned POpc = PN->getMachineOpcode();
2237  if (POpc == TargetOpcode::IMPLICIT_DEF)
2238  continue;
2239  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2240  POpc == TargetOpcode::INSERT_SUBREG ||
2241  POpc == TargetOpcode::SUBREG_TO_REG) {
2242  MVT VT = PN->getSimpleValueType(0);
2243  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2244  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2245  continue;
2246  }
2247  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2248  for (unsigned i = 0; i != NumDefs; ++i) {
2249  MVT VT = PN->getSimpleValueType(i);
2250  if (!PN->hasAnyUseOfValue(i))
2251  continue;
2252  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2253  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2254  // Register pressure tracking is imprecise. This can happen.
2255  RegPressure[RCId] = 0;
2256  else
2257  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2258  }
2259  }
2260 
2261  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2262  // may transfer data dependencies to CopyToReg.
2263  if (SU->NumSuccs && N->isMachineOpcode()) {
2264  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2265  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2266  MVT VT = N->getSimpleValueType(i);
2267  if (VT == MVT::Glue || VT == MVT::Other)
2268  continue;
2269  if (!N->hasAnyUseOfValue(i))
2270  continue;
2271  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2272  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2273  }
2274  }
2275 
2276  DEBUG(dumpRegPressure());
2277 }
2278 
2279 //===----------------------------------------------------------------------===//
2280 // Dynamic Node Priority for Register Pressure Reduction
2281 //===----------------------------------------------------------------------===//
2282 
2283 /// closestSucc - Returns the scheduled cycle of the successor which is
2284 /// closest to the current cycle.
2285 static unsigned closestSucc(const SUnit *SU) {
2286  unsigned MaxHeight = 0;
2287  for (const SDep &Succ : SU->Succs) {
2288  if (Succ.isCtrl()) continue; // ignore chain succs
2289  unsigned Height = Succ.getSUnit()->getHeight();
2290  // If there are bunch of CopyToRegs stacked up, they should be considered
2291  // to be at the same position.
2292  if (Succ.getSUnit()->getNode() &&
2293  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2294  Height = closestSucc(Succ.getSUnit())+1;
2295  if (Height > MaxHeight)
2296  MaxHeight = Height;
2297  }
2298  return MaxHeight;
2299 }
2300 
2301 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2302 /// for scratch registers, i.e. number of data dependencies.
2303 static unsigned calcMaxScratches(const SUnit *SU) {
2304  unsigned Scratches = 0;
2305  for (const SDep &Pred : SU->Preds) {
2306  if (Pred.isCtrl()) continue; // ignore chain preds
2307  Scratches++;
2308  }
2309  return Scratches;
2310 }
2311 
2312 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2313 /// CopyFromReg from a virtual register.
2314 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2315  bool RetVal = false;
2316  for (const SDep &Pred : SU->Preds) {
2317  if (Pred.isCtrl()) continue;
2318  const SUnit *PredSU = Pred.getSUnit();
2319  if (PredSU->getNode() &&
2320  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2321  unsigned Reg =
2322  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2324  RetVal = true;
2325  continue;
2326  }
2327  }
2328  return false;
2329  }
2330  return RetVal;
2331 }
2332 
2333 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2334 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2335 /// it has no other use. It should be scheduled closer to the terminator.
2336 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2337  bool RetVal = false;
2338  for (const SDep &Succ : SU->Succs) {
2339  if (Succ.isCtrl()) continue;
2340  const SUnit *SuccSU = Succ.getSUnit();
2341  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2342  unsigned Reg =
2343  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2345  RetVal = true;
2346  continue;
2347  }
2348  }
2349  return false;
2350  }
2351  return RetVal;
2352 }
2353 
2354 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2355 // set isVRegCycle for its CopyFromReg operands.
2356 //
2357 // This is only relevant for single-block loops, in which case the VRegCycle
2358 // node is likely an induction variable in which the operand and target virtual
2359 // registers should be coalesced (e.g. pre/post increment values). Setting the
2360 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2361 // CopyFromReg so that this node becomes the virtual register "kill". This
2362 // avoids interference between the values live in and out of the block and
2363 // eliminates a copy inside the loop.
2364 static void initVRegCycle(SUnit *SU) {
2366  return;
2367 
2368  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2369  return;
2370 
2371  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2372 
2373  SU->isVRegCycle = true;
2374 
2375  for (const SDep &Pred : SU->Preds) {
2376  if (Pred.isCtrl()) continue;
2377  Pred.getSUnit()->isVRegCycle = true;
2378  }
2379 }
2380 
2381 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2382 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2383 static void resetVRegCycle(SUnit *SU) {
2384  if (!SU->isVRegCycle)
2385  return;
2386 
2387  for (const SDep &Pred : SU->Preds) {
2388  if (Pred.isCtrl()) continue; // ignore chain preds
2389  SUnit *PredSU = Pred.getSUnit();
2390  if (PredSU->isVRegCycle) {
2391  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2392  "VRegCycle def must be CopyFromReg");
2393  Pred.getSUnit()->isVRegCycle = false;
2394  }
2395  }
2396 }
2397 
2398 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2399 // means a node that defines the VRegCycle has not been scheduled yet.
2400 static bool hasVRegCycleUse(const SUnit *SU) {
2401  // If this SU also defines the VReg, don't hoist it as a "use".
2402  if (SU->isVRegCycle)
2403  return false;
2404 
2405  for (const SDep &Pred : SU->Preds) {
2406  if (Pred.isCtrl()) continue; // ignore chain preds
2407  if (Pred.getSUnit()->isVRegCycle &&
2408  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2409  DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2410  return true;
2411  }
2412  }
2413  return false;
2414 }
2415 
2416 // Check for either a dependence (latency) or resource (hazard) stall.
2417 //
2418 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2419 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2420  if ((int)SPQ->getCurCycle() < Height) return true;
2421  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2423  return true;
2424  return false;
2425 }
2426 
2427 // Return -1 if left has higher priority, 1 if right has higher priority.
2428 // Return 0 if latency-based priority is equivalent.
2429 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2430  RegReductionPQBase *SPQ) {
2431  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2432  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2433  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2434  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2435  int LHeight = (int)left->getHeight() + LPenalty;
2436  int RHeight = (int)right->getHeight() + RPenalty;
2437 
2438  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2439  BUHasStall(left, LHeight, SPQ);
2440  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2441  BUHasStall(right, RHeight, SPQ);
2442 
2443  // If scheduling one of the node will cause a pipeline stall, delay it.
2444  // If scheduling either one of the node will cause a pipeline stall, sort
2445  // them according to their height.
2446  if (LStall) {
2447  if (!RStall)
2448  return 1;
2449  if (LHeight != RHeight)
2450  return LHeight > RHeight ? 1 : -1;
2451  } else if (RStall)
2452  return -1;
2453 
2454  // If either node is scheduling for latency, sort them by height/depth
2455  // and latency.
2456  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2457  right->SchedulingPref == Sched::ILP)) {
2458  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2459  // is enabled, grouping instructions by cycle, then its height is already
2460  // covered so only its depth matters. We also reach this point if both stall
2461  // but have the same height.
2462  if (!SPQ->getHazardRec()->isEnabled()) {
2463  if (LHeight != RHeight)
2464  return LHeight > RHeight ? 1 : -1;
2465  }
2466  int LDepth = left->getDepth() - LPenalty;
2467  int RDepth = right->getDepth() - RPenalty;
2468  if (LDepth != RDepth) {
2469  DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2470  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2471  << ") depth " << RDepth << "\n");
2472  return LDepth < RDepth ? 1 : -1;
2473  }
2474  if (left->Latency != right->Latency)
2475  return left->Latency > right->Latency ? 1 : -1;
2476  }
2477  return 0;
2478 }
2479 
2480 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2481  // Schedule physical register definitions close to their use. This is
2482  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2483  // long as shortening physreg live ranges is generally good, we can defer
2484  // creating a subtarget hook.
2485  if (!DisableSchedPhysRegJoin) {
2486  bool LHasPhysReg = left->hasPhysRegDefs;
2487  bool RHasPhysReg = right->hasPhysRegDefs;
2488  if (LHasPhysReg != RHasPhysReg) {
2489  #ifndef NDEBUG
2490  static const char *const PhysRegMsg[] = { " has no physreg",
2491  " defines a physreg" };
2492  #endif
2493  DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2494  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2495  << PhysRegMsg[RHasPhysReg] << "\n");
2496  return LHasPhysReg < RHasPhysReg;
2497  }
2498  }
2499 
2500  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2501  unsigned LPriority = SPQ->getNodePriority(left);
2502  unsigned RPriority = SPQ->getNodePriority(right);
2503 
2504  // Be really careful about hoisting call operands above previous calls.
2505  // Only allows it if it would reduce register pressure.
2506  if (left->isCall && right->isCallOp) {
2507  unsigned RNumVals = right->getNode()->getNumValues();
2508  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2509  }
2510  if (right->isCall && left->isCallOp) {
2511  unsigned LNumVals = left->getNode()->getNumValues();
2512  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2513  }
2514 
2515  if (LPriority != RPriority)
2516  return LPriority > RPriority;
2517 
2518  // One or both of the nodes are calls and their sethi-ullman numbers are the
2519  // same, then keep source order.
2520  if (left->isCall || right->isCall) {
2521  unsigned LOrder = SPQ->getNodeOrdering(left);
2522  unsigned ROrder = SPQ->getNodeOrdering(right);
2523 
2524  // Prefer an ordering where the lower the non-zero order number, the higher
2525  // the preference.
2526  if ((LOrder || ROrder) && LOrder != ROrder)
2527  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2528  }
2529 
2530  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2531  // e.g.
2532  // t1 = op t2, c1
2533  // t3 = op t4, c2
2534  //
2535  // and the following instructions are both ready.
2536  // t2 = op c3
2537  // t4 = op c4
2538  //
2539  // Then schedule t2 = op first.
2540  // i.e.
2541  // t4 = op c4
2542  // t2 = op c3
2543  // t1 = op t2, c1
2544  // t3 = op t4, c2
2545  //
2546  // This creates more short live intervals.
2547  unsigned LDist = closestSucc(left);
2548  unsigned RDist = closestSucc(right);
2549  if (LDist != RDist)
2550  return LDist < RDist;
2551 
2552  // How many registers becomes live when the node is scheduled.
2553  unsigned LScratch = calcMaxScratches(left);
2554  unsigned RScratch = calcMaxScratches(right);
2555  if (LScratch != RScratch)
2556  return LScratch > RScratch;
2557 
2558  // Comparing latency against a call makes little sense unless the node
2559  // is register pressure-neutral.
2560  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2561  return (left->NodeQueueId > right->NodeQueueId);
2562 
2563  // Do not compare latencies when one or both of the nodes are calls.
2564  if (!DisableSchedCycles &&
2565  !(left->isCall || right->isCall)) {
2566  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2567  if (result != 0)
2568  return result > 0;
2569  }
2570  else {
2571  if (left->getHeight() != right->getHeight())
2572  return left->getHeight() > right->getHeight();
2573 
2574  if (left->getDepth() != right->getDepth())
2575  return left->getDepth() < right->getDepth();
2576  }
2577 
2578  assert(left->NodeQueueId && right->NodeQueueId &&
2579  "NodeQueueId cannot be zero");
2580  return (left->NodeQueueId > right->NodeQueueId);
2581 }
2582 
2583 // Bottom up
2584 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2585  if (int res = checkSpecialNodes(left, right))
2586  return res > 0;
2587 
2588  return BURRSort(left, right, SPQ);
2589 }
2590 
2591 // Source order, otherwise bottom up.
2592 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2593  if (int res = checkSpecialNodes(left, right))
2594  return res > 0;
2595 
2596  unsigned LOrder = SPQ->getNodeOrdering(left);
2597  unsigned ROrder = SPQ->getNodeOrdering(right);
2598 
2599  // Prefer an ordering where the lower the non-zero order number, the higher
2600  // the preference.
2601  if ((LOrder || ROrder) && LOrder != ROrder)
2602  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2603 
2604  return BURRSort(left, right, SPQ);
2605 }
2606 
2607 // If the time between now and when the instruction will be ready can cover
2608 // the spill code, then avoid adding it to the ready queue. This gives long
2609 // stalls highest priority and allows hoisting across calls. It should also
2610 // speed up processing the available queue.
2611 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2612  static const unsigned ReadyDelay = 3;
2613 
2614  if (SPQ->MayReduceRegPressure(SU)) return true;
2615 
2616  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2617 
2618  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2620  return false;
2621 
2622  return true;
2623 }
2624 
2625 // Return true if right should be scheduled with higher priority than left.
2626 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2627  if (int res = checkSpecialNodes(left, right))
2628  return res > 0;
2629 
2630  if (left->isCall || right->isCall)
2631  // No way to compute latency of calls.
2632  return BURRSort(left, right, SPQ);
2633 
2634  bool LHigh = SPQ->HighRegPressure(left);
2635  bool RHigh = SPQ->HighRegPressure(right);
2636  // Avoid causing spills. If register pressure is high, schedule for
2637  // register pressure reduction.
2638  if (LHigh && !RHigh) {
2639  DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2640  << right->NodeNum << ")\n");
2641  return true;
2642  }
2643  else if (!LHigh && RHigh) {
2644  DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2645  << left->NodeNum << ")\n");
2646  return false;
2647  }
2648  if (!LHigh && !RHigh) {
2649  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2650  if (result != 0)
2651  return result > 0;
2652  }
2653  return BURRSort(left, right, SPQ);
2654 }
2655 
2656 // Schedule as many instructions in each cycle as possible. So don't make an
2657 // instruction available unless it is ready in the current cycle.
2658 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2659  if (SU->getHeight() > CurCycle) return false;
2660 
2661  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2663  return false;
2664 
2665  return true;
2666 }
2667 
2668 static bool canEnableCoalescing(SUnit *SU) {
2669  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2670  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2671  // CopyToReg should be close to its uses to facilitate coalescing and
2672  // avoid spilling.
2673  return true;
2674 
2675  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2676  Opc == TargetOpcode::SUBREG_TO_REG ||
2677  Opc == TargetOpcode::INSERT_SUBREG)
2678  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2679  // close to their uses to facilitate coalescing.
2680  return true;
2681 
2682  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2683  // If SU does not have a register def, schedule it close to its uses
2684  // because it does not lengthen any live ranges.
2685  return true;
2686 
2687  return false;
2688 }
2689 
2690 // list-ilp is currently an experimental scheduler that allows various
2691 // heuristics to be enabled prior to the normal register reduction logic.
2692 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2693  if (int res = checkSpecialNodes(left, right))
2694  return res > 0;
2695 
2696  if (left->isCall || right->isCall)
2697  // No way to compute latency of calls.
2698  return BURRSort(left, right, SPQ);
2699 
2700  unsigned LLiveUses = 0, RLiveUses = 0;
2701  int LPDiff = 0, RPDiff = 0;
2703  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2704  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2705  }
2706  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2707  DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2708  << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2709  return LPDiff > RPDiff;
2710  }
2711 
2712  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2713  bool LReduce = canEnableCoalescing(left);
2714  bool RReduce = canEnableCoalescing(right);
2715  if (LReduce && !RReduce) return false;
2716  if (RReduce && !LReduce) return true;
2717  }
2718 
2719  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2720  DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2721  << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2722  return LLiveUses < RLiveUses;
2723  }
2724 
2725  if (!DisableSchedStalls) {
2726  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2727  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2728  if (LStall != RStall)
2729  return left->getHeight() > right->getHeight();
2730  }
2731 
2732  if (!DisableSchedCriticalPath) {
2733  int spread = (int)left->getDepth() - (int)right->getDepth();
2734  if (std::abs(spread) > MaxReorderWindow) {
2735  DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2736  << left->getDepth() << " != SU(" << right->NodeNum << "): "
2737  << right->getDepth() << "\n");
2738  return left->getDepth() < right->getDepth();
2739  }
2740  }
2741 
2742  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2743  int spread = (int)left->getHeight() - (int)right->getHeight();
2744  if (std::abs(spread) > MaxReorderWindow)
2745  return left->getHeight() > right->getHeight();
2746  }
2747 
2748  return BURRSort(left, right, SPQ);
2749 }
2750 
2751 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2752  SUnits = &sunits;
2753  // Add pseudo dependency edges for two-address nodes.
2754  if (!Disable2AddrHack)
2755  AddPseudoTwoAddrDeps();
2756  // Reroute edges to nodes with multiple uses.
2757  if (!TracksRegPressure && !SrcOrder)
2758  PrescheduleNodesWithMultipleUses();
2759  // Calculate node priorities.
2760  CalculateSethiUllmanNumbers();
2761 
2762  // For single block loops, mark nodes that look like canonical IV increments.
2763  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2764  for (SUnit &SU : sunits)
2765  initVRegCycle(&SU);
2766 }
2767 
2768 //===----------------------------------------------------------------------===//
2769 // Preschedule for Register Pressure
2770 //===----------------------------------------------------------------------===//
2771 
2772 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2773  if (SU->isTwoAddress) {
2774  unsigned Opc = SU->getNode()->getMachineOpcode();
2775  const MCInstrDesc &MCID = TII->get(Opc);
2776  unsigned NumRes = MCID.getNumDefs();
2777  unsigned NumOps = MCID.getNumOperands() - NumRes;
2778  for (unsigned i = 0; i != NumOps; ++i) {
2779  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2780  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2781  if (DU->getNodeId() != -1 &&
2782  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2783  return true;
2784  }
2785  }
2786  }
2787  return false;
2788 }
2789 
2790 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2791 /// successor's explicit physregs whose definition can reach DepSU.
2792 /// i.e. DepSU should not be scheduled above SU.
2793 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2794  ScheduleDAGRRList *scheduleDAG,
2795  const TargetInstrInfo *TII,
2796  const TargetRegisterInfo *TRI) {
2797  const MCPhysReg *ImpDefs
2798  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2799  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2800  if(!ImpDefs && !RegMask)
2801  return false;
2802 
2803  for (const SDep &Succ : SU->Succs) {
2804  SUnit *SuccSU = Succ.getSUnit();
2805  for (const SDep &SuccPred : SuccSU->Preds) {
2806  if (!SuccPred.isAssignedRegDep())
2807  continue;
2808 
2809  if (RegMask &&
2810  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2811  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2812  return true;
2813 
2814  if (ImpDefs)
2815  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2816  // Return true if SU clobbers this physical register use and the
2817  // definition of the register reaches from DepSU. IsReachable queries
2818  // a topological forward sort of the DAG (following the successors).
2819  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2820  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2821  return true;
2822  }
2823  }
2824  return false;
2825 }
2826 
2827 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2828 /// physical register defs.
2829 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2830  const TargetInstrInfo *TII,
2831  const TargetRegisterInfo *TRI) {
2832  SDNode *N = SuccSU->getNode();
2833  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2834  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2835  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2836  for (const SDNode *SUNode = SU->getNode(); SUNode;
2837  SUNode = SUNode->getGluedNode()) {
2838  if (!SUNode->isMachineOpcode())
2839  continue;
2840  const MCPhysReg *SUImpDefs =
2841  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2842  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2843  if (!SUImpDefs && !SURegMask)
2844  continue;
2845  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2846  MVT VT = N->getSimpleValueType(i);
2847  if (VT == MVT::Glue || VT == MVT::Other)
2848  continue;
2849  if (!N->hasAnyUseOfValue(i))
2850  continue;
2851  unsigned Reg = ImpDefs[i - NumDefs];
2852  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2853  return true;
2854  if (!SUImpDefs)
2855  continue;
2856  for (;*SUImpDefs; ++SUImpDefs) {
2857  unsigned SUReg = *SUImpDefs;
2858  if (TRI->regsOverlap(Reg, SUReg))
2859  return true;
2860  }
2861  }
2862  }
2863  return false;
2864 }
2865 
2866 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2867 /// are not handled well by the general register pressure reduction
2868 /// heuristics. When presented with code like this:
2869 ///
2870 /// N
2871 /// / |
2872 /// / |
2873 /// U store
2874 /// |
2875 /// ...
2876 ///
2877 /// the heuristics tend to push the store up, but since the
2878 /// operand of the store has another use (U), this would increase
2879 /// the length of that other use (the U->N edge).
2880 ///
2881 /// This function transforms code like the above to route U's
2882 /// dependence through the store when possible, like this:
2883 ///
2884 /// N
2885 /// ||
2886 /// ||
2887 /// store
2888 /// |
2889 /// U
2890 /// |
2891 /// ...
2892 ///
2893 /// This results in the store being scheduled immediately
2894 /// after N, which shortens the U->N live range, reducing
2895 /// register pressure.
2896 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2897  // Visit all the nodes in topological order, working top-down.
2898  for (SUnit &SU : *SUnits) {
2899  // For now, only look at nodes with no data successors, such as stores.
2900  // These are especially important, due to the heuristics in
2901  // getNodePriority for nodes with no data successors.
2902  if (SU.NumSuccs != 0)
2903  continue;
2904  // For now, only look at nodes with exactly one data predecessor.
2905  if (SU.NumPreds != 1)
2906  continue;
2907  // Avoid prescheduling copies to virtual registers, which don't behave
2908  // like other nodes from the perspective of scheduling heuristics.
2909  if (SDNode *N = SU.getNode())
2910  if (N->getOpcode() == ISD::CopyToReg &&
2912  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2913  continue;
2914 
2915  // Locate the single data predecessor.
2916  SUnit *PredSU = nullptr;
2917  for (const SDep &Pred : SU.Preds)
2918  if (!Pred.isCtrl()) {
2919  PredSU = Pred.getSUnit();
2920  break;
2921  }
2922  assert(PredSU);
2923 
2924  // Don't rewrite edges that carry physregs, because that requires additional
2925  // support infrastructure.
2926  if (PredSU->hasPhysRegDefs)
2927  continue;
2928  // Short-circuit the case where SU is PredSU's only data successor.
2929  if (PredSU->NumSuccs == 1)
2930  continue;
2931  // Avoid prescheduling to copies from virtual registers, which don't behave
2932  // like other nodes from the perspective of scheduling heuristics.
2933  if (SDNode *N = SU.getNode())
2934  if (N->getOpcode() == ISD::CopyFromReg &&
2936  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2937  continue;
2938 
2939  // Perform checks on the successors of PredSU.
2940  for (const SDep &PredSucc : PredSU->Succs) {
2941  SUnit *PredSuccSU = PredSucc.getSUnit();
2942  if (PredSuccSU == &SU) continue;
2943  // If PredSU has another successor with no data successors, for
2944  // now don't attempt to choose either over the other.
2945  if (PredSuccSU->NumSuccs == 0)
2946  goto outer_loop_continue;
2947  // Don't break physical register dependencies.
2948  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2949  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2950  goto outer_loop_continue;
2951  // Don't introduce graph cycles.
2952  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2953  goto outer_loop_continue;
2954  }
2955 
2956  // Ok, the transformation is safe and the heuristics suggest it is
2957  // profitable. Update the graph.
2958  DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
2959  << " next to PredSU #" << PredSU->NodeNum
2960  << " to guide scheduling in the presence of multiple uses\n");
2961  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2962  SDep Edge = PredSU->Succs[i];
2963  assert(!Edge.isAssignedRegDep());
2964  SUnit *SuccSU = Edge.getSUnit();
2965  if (SuccSU != &SU) {
2966  Edge.setSUnit(PredSU);
2967  scheduleDAG->RemovePred(SuccSU, Edge);
2968  scheduleDAG->AddPred(&SU, Edge);
2969  Edge.setSUnit(&SU);
2970  scheduleDAG->AddPred(SuccSU, Edge);
2971  --i;
2972  }
2973  }
2974  outer_loop_continue:;
2975  }
2976 }
2977 
2978 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2979 /// it as a def&use operand. Add a pseudo control edge from it to the other
2980 /// node (if it won't create a cycle) so the two-address one will be scheduled
2981 /// first (lower in the schedule). If both nodes are two-address, favor the
2982 /// one that has a CopyToReg use (more likely to be a loop induction update).
2983 /// If both are two-address, but one is commutable while the other is not
2984 /// commutable, favor the one that's not commutable.
2985 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2986  for (SUnit &SU : *SUnits) {
2987  if (!SU.isTwoAddress)
2988  continue;
2989 
2990  SDNode *Node = SU.getNode();
2991  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
2992  continue;
2993 
2994  bool isLiveOut = hasOnlyLiveOutUses(&SU);
2995  unsigned Opc = Node->getMachineOpcode();
2996  const MCInstrDesc &MCID = TII->get(Opc);
2997  unsigned NumRes = MCID.getNumDefs();
2998  unsigned NumOps = MCID.getNumOperands() - NumRes;
2999  for (unsigned j = 0; j != NumOps; ++j) {
3000  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3001  continue;
3002  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3003  if (DU->getNodeId() == -1)
3004  continue;
3005  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3006  if (!DUSU)
3007  continue;
3008  for (const SDep &Succ : DUSU->Succs) {
3009  if (Succ.isCtrl())
3010  continue;
3011  SUnit *SuccSU = Succ.getSUnit();
3012  if (SuccSU == &SU)
3013  continue;
3014  // Be conservative. Ignore if nodes aren't at roughly the same
3015  // depth and height.
3016  if (SuccSU->getHeight() < SU.getHeight() &&
3017  (SU.getHeight() - SuccSU->getHeight()) > 1)
3018  continue;
3019  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3020  // constrains whatever is using the copy, instead of the copy
3021  // itself. In the case that the copy is coalesced, this
3022  // preserves the intent of the pseudo two-address heurietics.
3023  while (SuccSU->Succs.size() == 1 &&
3024  SuccSU->getNode()->isMachineOpcode() &&
3025  SuccSU->getNode()->getMachineOpcode() ==
3026  TargetOpcode::COPY_TO_REGCLASS)
3027  SuccSU = SuccSU->Succs.front().getSUnit();
3028  // Don't constrain non-instruction nodes.
3029  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3030  continue;
3031  // Don't constrain nodes with physical register defs if the
3032  // predecessor can clobber them.
3033  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3034  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3035  continue;
3036  }
3037  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3038  // these may be coalesced away. We want them close to their uses.
3039  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3040  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3041  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3042  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3043  continue;
3044  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3045  (!canClobber(SuccSU, DUSU) ||
3046  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3047  (!SU.isCommutable && SuccSU->isCommutable)) &&
3048  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3049  DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
3050  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3051  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3052  }
3053  }
3054  }
3055  }
3056 }
3057 
3058 //===----------------------------------------------------------------------===//
3059 // Public Constructor Functions
3060 //===----------------------------------------------------------------------===//
3061 
3064  CodeGenOpt::Level OptLevel) {
3065  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3066  const TargetInstrInfo *TII = STI.getInstrInfo();
3067  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3068 
3069  BURegReductionPriorityQueue *PQ =
3070  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3071  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3072  PQ->setScheduleDAG(SD);
3073  return SD;
3074 }
3075 
3078  CodeGenOpt::Level OptLevel) {
3079  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3080  const TargetInstrInfo *TII = STI.getInstrInfo();
3081  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3082 
3083  SrcRegReductionPriorityQueue *PQ =
3084  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3085  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3086  PQ->setScheduleDAG(SD);
3087  return SD;
3088 }
3089 
3092  CodeGenOpt::Level OptLevel) {
3093  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3094  const TargetInstrInfo *TII = STI.getInstrInfo();
3095  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3096  const TargetLowering *TLI = IS->TLI;
3097 
3098  HybridBURRPriorityQueue *PQ =
3099  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3100 
3101  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3102  PQ->setScheduleDAG(SD);
3103  return SD;
3104 }
3105 
3108  CodeGenOpt::Level OptLevel) {
3109  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3110  const TargetInstrInfo *TII = STI.getInstrInfo();
3111  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3112  const TargetLowering *TLI = IS->TLI;
3113 
3114  ILPBURRPriorityQueue *PQ =
3115  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3116  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3117  PQ->setScheduleDAG(SD);
3118  return SD;
3119 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:271
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:425
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:360
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:528
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
void dump(const ScheduleDAG *G) const
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:493
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:212
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:307
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:547
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:506
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:554
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
Reg
All possible values of the reg field in the ModR/M byte.
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:556
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:635
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:233
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:255
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:287
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:802
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:58
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:534
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:99
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:238
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:279
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:295
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:835
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:640
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:187
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:309
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:552
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:281
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:288
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:270
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:277
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:530
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
#define DEBUG(X)
Definition: Debug.h:118
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:291
void dumpAll(const ScheduleDAG *G) const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:286
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:444
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:873
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.