LLVM  6.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "ScheduleDAGSDNodes.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/Statistic.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/Support/Debug.h"
35 #include <climits>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "pre-RA-sched"
39 
40 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
41 STATISTIC(NumUnfolds, "Number of nodes unfolded");
42 STATISTIC(NumDups, "Number of duplicated nodes");
43 STATISTIC(NumPRCopies, "Number of physical register copies");
44 
45 static RegisterScheduler
46  burrListDAGScheduler("list-burr",
47  "Bottom-up register reduction list scheduling",
49 static RegisterScheduler
50  sourceListDAGScheduler("source",
51  "Similar to list-burr but schedules in source "
52  "order when possible",
54 
55 static RegisterScheduler
56  hybridListDAGScheduler("list-hybrid",
57  "Bottom-up register pressure aware list scheduling "
58  "which tries to balance latency and register pressure",
60 
61 static RegisterScheduler
62  ILPListDAGScheduler("list-ilp",
63  "Bottom-up register pressure aware list scheduling "
64  "which tries to balance ILP and register pressure",
66 
68  "disable-sched-cycles", cl::Hidden, cl::init(false),
69  cl::desc("Disable cycle-level precision during preRA scheduling"));
70 
71 // Temporary sched=list-ilp flags until the heuristics are robust.
72 // Some options are also available under sched=list-hybrid.
74  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
75  cl::desc("Disable regpressure priority in sched=list-ilp"));
77  "disable-sched-live-uses", cl::Hidden, cl::init(true),
78  cl::desc("Disable live use priority in sched=list-ilp"));
80  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
81  cl::desc("Disable virtual register cycle interference checks"));
83  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
84  cl::desc("Disable physreg def-use affinity"));
86  "disable-sched-stalls", cl::Hidden, cl::init(true),
87  cl::desc("Disable no-stall priority in sched=list-ilp"));
89  "disable-sched-critical-path", cl::Hidden, cl::init(false),
90  cl::desc("Disable critical path priority in sched=list-ilp"));
92  "disable-sched-height", cl::Hidden, cl::init(false),
93  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
95  "disable-2addr-hack", cl::Hidden, cl::init(true),
96  cl::desc("Disable scheduler's two-address hack"));
97 
99  "max-sched-reorder", cl::Hidden, cl::init(6),
100  cl::desc("Number of instructions to allow ahead of the critical path "
101  "in sched=list-ilp"));
102 
104  "sched-avg-ipc", cl::Hidden, cl::init(1),
105  cl::desc("Average inst/cycle whan no target itinerary exists."));
106 
107 namespace {
108 //===----------------------------------------------------------------------===//
109 /// ScheduleDAGRRList - The actual register reduction list scheduler
110 /// implementation. This supports both top-down and bottom-up scheduling.
111 ///
112 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
113 private:
114  /// NeedLatency - True if the scheduler will make use of latency information.
115  ///
116  bool NeedLatency;
117 
118  /// AvailableQueue - The priority queue to use for the available SUnits.
119  SchedulingPriorityQueue *AvailableQueue;
120 
121  /// PendingQueue - This contains all of the instructions whose operands have
122  /// been issued, but their results are not ready yet (due to the latency of
123  /// the operation). Once the operands becomes available, the instruction is
124  /// added to the AvailableQueue.
125  std::vector<SUnit*> PendingQueue;
126 
127  /// HazardRec - The hazard recognizer to use.
128  ScheduleHazardRecognizer *HazardRec;
129 
130  /// CurCycle - The current scheduler state corresponds to this cycle.
131  unsigned CurCycle;
132 
133  /// MinAvailableCycle - Cycle of the soonest available instruction.
134  unsigned MinAvailableCycle;
135 
136  /// IssueCount - Count instructions issued in this cycle
137  /// Currently valid only for bottom-up scheduling.
138  unsigned IssueCount;
139 
140  /// LiveRegDefs - A set of physical registers and their definition
141  /// that are "live". These nodes must be scheduled before any other nodes that
142  /// modifies the registers can be scheduled.
143  unsigned NumLiveRegs;
144  std::unique_ptr<SUnit*[]> LiveRegDefs;
145  std::unique_ptr<SUnit*[]> LiveRegGens;
146 
147  // Collect interferences between physical register use/defs.
148  // Each interference is an SUnit and set of physical registers.
149  SmallVector<SUnit*, 4> Interferences;
150  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
151  LRegsMapT LRegsMap;
152 
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
156 
157  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
158  // DAG crawling.
159  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
160 
161 public:
162  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
163  SchedulingPriorityQueue *availqueue,
164  CodeGenOpt::Level OptLevel)
165  : ScheduleDAGSDNodes(mf),
166  NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
167  Topo(SUnits, nullptr) {
168 
169  const TargetSubtargetInfo &STI = mf.getSubtarget();
170  if (DisableSchedCycles || !NeedLatency)
171  HazardRec = new ScheduleHazardRecognizer();
172  else
173  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
174  }
175 
176  ~ScheduleDAGRRList() override {
177  delete HazardRec;
178  delete AvailableQueue;
179  }
180 
181  void Schedule() override;
182 
183  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
184 
185  /// IsReachable - Checks if SU is reachable from TargetSU.
186  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
187  return Topo.IsReachable(SU, TargetSU);
188  }
189 
190  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
191  /// create a cycle.
192  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
193  return Topo.WillCreateCycle(SU, TargetSU);
194  }
195 
196  /// AddPred - adds a predecessor edge to SUnit SU.
197  /// This returns true if this is a new predecessor.
198  /// Updates the topological ordering if required.
199  void AddPred(SUnit *SU, const SDep &D) {
200  Topo.AddPred(SU, D.getSUnit());
201  SU->addPred(D);
202  }
203 
204  /// RemovePred - removes a predecessor edge from SUnit SU.
205  /// This returns true if an edge was removed.
206  /// Updates the topological ordering if required.
207  void RemovePred(SUnit *SU, const SDep &D) {
208  Topo.RemovePred(SU, D.getSUnit());
209  SU->removePred(D);
210  }
211 
212 private:
213  bool isReady(SUnit *SU) {
214  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
215  AvailableQueue->isReady(SU);
216  }
217 
218  void ReleasePred(SUnit *SU, const SDep *PredEdge);
219  void ReleasePredecessors(SUnit *SU);
220  void ReleasePending();
221  void AdvanceToCycle(unsigned NextCycle);
222  void AdvancePastStalls(SUnit *SU);
223  void EmitNode(SUnit *SU);
224  void ScheduleNodeBottomUp(SUnit*);
225  void CapturePred(SDep *PredEdge);
226  void UnscheduleNodeBottomUp(SUnit*);
227  void RestoreHazardCheckerBottomUp();
228  void BacktrackBottomUp(SUnit*, SUnit*);
229  SUnit *TryUnfoldSU(SUnit *);
230  SUnit *CopyAndMoveSuccessors(SUnit*);
231  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
232  const TargetRegisterClass*,
233  const TargetRegisterClass*,
235  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
236 
237  void releaseInterferences(unsigned Reg = 0);
238 
239  SUnit *PickNodeToScheduleBottomUp();
240  void ListScheduleBottomUp();
241 
242  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
243  /// Updates the topological ordering if required.
244  SUnit *CreateNewSUnit(SDNode *N) {
245  unsigned NumSUnits = SUnits.size();
246  SUnit *NewNode = newSUnit(N);
247  // Update the topological ordering.
248  if (NewNode->NodeNum >= NumSUnits)
250  return NewNode;
251  }
252 
253  /// CreateClone - Creates a new SUnit from an existing one.
254  /// Updates the topological ordering if required.
255  SUnit *CreateClone(SUnit *N) {
256  unsigned NumSUnits = SUnits.size();
257  SUnit *NewNode = Clone(N);
258  // Update the topological ordering.
259  if (NewNode->NodeNum >= NumSUnits)
261  return NewNode;
262  }
263 
264  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
265  /// need actual latency information but the hybrid scheduler does.
266  bool forceUnitLatencies() const override {
267  return !NeedLatency;
268  }
269 };
270 } // end anonymous namespace
271 
272 /// GetCostForDef - Looks up the register class and cost for a given definition.
273 /// Typically this just means looking up the representative register class,
274 /// but for untyped values (MVT::Untyped) it means inspecting the node's
275 /// opcode to determine what register class is being generated.
276 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
277  const TargetLowering *TLI,
278  const TargetInstrInfo *TII,
279  const TargetRegisterInfo *TRI,
280  unsigned &RegClass, unsigned &Cost,
281  const MachineFunction &MF) {
282  MVT VT = RegDefPos.GetValue();
283 
284  // Special handling for untyped values. These values can only come from
285  // the expansion of custom DAG-to-DAG patterns.
286  if (VT == MVT::Untyped) {
287  const SDNode *Node = RegDefPos.GetNode();
288 
289  // Special handling for CopyFromReg of untyped values.
290  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
291  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
292  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
293  RegClass = RC->getID();
294  Cost = 1;
295  return;
296  }
297 
298  unsigned Opcode = Node->getMachineOpcode();
299  if (Opcode == TargetOpcode::REG_SEQUENCE) {
300  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
301  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
302  RegClass = RC->getID();
303  Cost = 1;
304  return;
305  }
306 
307  unsigned Idx = RegDefPos.GetIdx();
308  const MCInstrDesc Desc = TII->get(Opcode);
309  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
310  RegClass = RC->getID();
311  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
312  // better way to determine it.
313  Cost = 1;
314  } else {
315  RegClass = TLI->getRepRegClassFor(VT)->getID();
316  Cost = TLI->getRepRegClassCostFor(VT);
317  }
318 }
319 
320 /// Schedule - Schedule the DAG using list scheduling.
321 void ScheduleDAGRRList::Schedule() {
322  DEBUG(dbgs()
323  << "********** List Scheduling BB#" << BB->getNumber()
324  << " '" << BB->getName() << "' **********\n");
325 
326  CurCycle = 0;
327  IssueCount = 0;
328  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
329  NumLiveRegs = 0;
330  // Allocate slots for each physical register, plus one for a special register
331  // to track the virtual resource of a calling sequence.
332  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
333  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
334  CallSeqEndForStart.clear();
335  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
336 
337  // Build the scheduling graph.
338  BuildSchedGraph(nullptr);
339 
340  DEBUG(for (SUnit &SU : SUnits)
341  SU.dumpAll(this));
343 
344  AvailableQueue->initNodes(SUnits);
345 
346  HazardRec->Reset();
347 
348  // Execute the actual scheduling loop.
349  ListScheduleBottomUp();
350 
351  AvailableQueue->releaseState();
352 
353  DEBUG({
354  dbgs() << "*** Final schedule ***\n";
355  dumpSchedule();
356  dbgs() << '\n';
357  });
358 }
359 
360 //===----------------------------------------------------------------------===//
361 // Bottom-Up Scheduling
362 //===----------------------------------------------------------------------===//
363 
364 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
365 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
366 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
367  SUnit *PredSU = PredEdge->getSUnit();
368 
369 #ifndef NDEBUG
370  if (PredSU->NumSuccsLeft == 0) {
371  dbgs() << "*** Scheduling failed! ***\n";
372  PredSU->dump(this);
373  dbgs() << " has been released too many times!\n";
374  llvm_unreachable(nullptr);
375  }
376 #endif
377  --PredSU->NumSuccsLeft;
378 
379  if (!forceUnitLatencies()) {
380  // Updating predecessor's height. This is now the cycle when the
381  // predecessor can be scheduled without causing a pipeline stall.
382  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
383  }
384 
385  // If all the node's successors are scheduled, this node is ready
386  // to be scheduled. Ignore the special EntrySU node.
387  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
388  PredSU->isAvailable = true;
389 
390  unsigned Height = PredSU->getHeight();
391  if (Height < MinAvailableCycle)
392  MinAvailableCycle = Height;
393 
394  if (isReady(PredSU)) {
395  AvailableQueue->push(PredSU);
396  }
397  // CapturePred and others may have left the node in the pending queue, avoid
398  // adding it twice.
399  else if (!PredSU->isPending) {
400  PredSU->isPending = true;
401  PendingQueue.push_back(PredSU);
402  }
403  }
404 }
405 
406 /// IsChainDependent - Test if Outer is reachable from Inner through
407 /// chain dependencies.
408 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
409  unsigned NestLevel,
410  const TargetInstrInfo *TII) {
411  SDNode *N = Outer;
412  for (;;) {
413  if (N == Inner)
414  return true;
415  // For a TokenFactor, examine each operand. There may be multiple ways
416  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
417  // most nesting in order to ensure that we find the corresponding match.
418  if (N->getOpcode() == ISD::TokenFactor) {
419  for (const SDValue &Op : N->op_values())
420  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
421  return true;
422  return false;
423  }
424  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
425  if (N->isMachineOpcode()) {
426  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
427  ++NestLevel;
428  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
429  if (NestLevel == 0)
430  return false;
431  --NestLevel;
432  }
433  }
434  // Otherwise, find the chain and continue climbing.
435  for (const SDValue &Op : N->op_values())
436  if (Op.getValueType() == MVT::Other) {
437  N = Op.getNode();
438  goto found_chain_operand;
439  }
440  return false;
441  found_chain_operand:;
442  if (N->getOpcode() == ISD::EntryToken)
443  return false;
444  }
445 }
446 
447 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
448 /// the corresponding (lowered) CALLSEQ_BEGIN node.
449 ///
450 /// NestLevel and MaxNested are used in recursion to indcate the current level
451 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
452 /// level seen so far.
453 ///
454 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
455 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
456 static SDNode *
457 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
458  const TargetInstrInfo *TII) {
459  for (;;) {
460  // For a TokenFactor, examine each operand. There may be multiple ways
461  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
462  // most nesting in order to ensure that we find the corresponding match.
463  if (N->getOpcode() == ISD::TokenFactor) {
464  SDNode *Best = nullptr;
465  unsigned BestMaxNest = MaxNest;
466  for (const SDValue &Op : N->op_values()) {
467  unsigned MyNestLevel = NestLevel;
468  unsigned MyMaxNest = MaxNest;
469  if (SDNode *New = FindCallSeqStart(Op.getNode(),
470  MyNestLevel, MyMaxNest, TII))
471  if (!Best || (MyMaxNest > BestMaxNest)) {
472  Best = New;
473  BestMaxNest = MyMaxNest;
474  }
475  }
476  assert(Best);
477  MaxNest = BestMaxNest;
478  return Best;
479  }
480  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
481  if (N->isMachineOpcode()) {
482  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
483  ++NestLevel;
484  MaxNest = std::max(MaxNest, NestLevel);
485  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
486  assert(NestLevel != 0);
487  --NestLevel;
488  if (NestLevel == 0)
489  return N;
490  }
491  }
492  // Otherwise, find the chain and continue climbing.
493  for (const SDValue &Op : N->op_values())
494  if (Op.getValueType() == MVT::Other) {
495  N = Op.getNode();
496  goto found_chain_operand;
497  }
498  return nullptr;
499  found_chain_operand:;
500  if (N->getOpcode() == ISD::EntryToken)
501  return nullptr;
502  }
503 }
504 
505 /// Call ReleasePred for each predecessor, then update register live def/gen.
506 /// Always update LiveRegDefs for a register dependence even if the current SU
507 /// also defines the register. This effectively create one large live range
508 /// across a sequence of two-address node. This is important because the
509 /// entire chain must be scheduled together. Example:
510 ///
511 /// flags = (3) add
512 /// flags = (2) addc flags
513 /// flags = (1) addc flags
514 ///
515 /// results in
516 ///
517 /// LiveRegDefs[flags] = 3
518 /// LiveRegGens[flags] = 1
519 ///
520 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
521 /// interference on flags.
522 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
523  // Bottom up: release predecessors
524  for (SDep &Pred : SU->Preds) {
525  ReleasePred(SU, &Pred);
526  if (Pred.isAssignedRegDep()) {
527  // This is a physical register dependency and it's impossible or
528  // expensive to copy the register. Make sure nothing that can
529  // clobber the register is scheduled between the predecessor and
530  // this node.
531  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
532  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
533  "interference on register dependence");
534  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
535  if (!LiveRegGens[Pred.getReg()]) {
536  ++NumLiveRegs;
537  LiveRegGens[Pred.getReg()] = SU;
538  }
539  }
540  }
541 
542  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
543  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
544  // these nodes, to prevent other calls from being interscheduled with them.
545  unsigned CallResource = TRI->getNumRegs();
546  if (!LiveRegDefs[CallResource])
547  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
548  if (Node->isMachineOpcode() &&
549  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
550  unsigned NestLevel = 0;
551  unsigned MaxNest = 0;
552  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
553 
554  SUnit *Def = &SUnits[N->getNodeId()];
555  CallSeqEndForStart[Def] = SU;
556 
557  ++NumLiveRegs;
558  LiveRegDefs[CallResource] = Def;
559  LiveRegGens[CallResource] = SU;
560  break;
561  }
562 }
563 
564 /// Check to see if any of the pending instructions are ready to issue. If
565 /// so, add them to the available queue.
566 void ScheduleDAGRRList::ReleasePending() {
567  if (DisableSchedCycles) {
568  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
569  return;
570  }
571 
572  // If the available queue is empty, it is safe to reset MinAvailableCycle.
573  if (AvailableQueue->empty())
574  MinAvailableCycle = UINT_MAX;
575 
576  // Check to see if any of the pending instructions are ready to issue. If
577  // so, add them to the available queue.
578  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
579  unsigned ReadyCycle = PendingQueue[i]->getHeight();
580  if (ReadyCycle < MinAvailableCycle)
581  MinAvailableCycle = ReadyCycle;
582 
583  if (PendingQueue[i]->isAvailable) {
584  if (!isReady(PendingQueue[i]))
585  continue;
586  AvailableQueue->push(PendingQueue[i]);
587  }
588  PendingQueue[i]->isPending = false;
589  PendingQueue[i] = PendingQueue.back();
590  PendingQueue.pop_back();
591  --i; --e;
592  }
593 }
594 
595 /// Move the scheduler state forward by the specified number of Cycles.
596 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
597  if (NextCycle <= CurCycle)
598  return;
599 
600  IssueCount = 0;
601  AvailableQueue->setCurCycle(NextCycle);
602  if (!HazardRec->isEnabled()) {
603  // Bypass lots of virtual calls in case of long latency.
604  CurCycle = NextCycle;
605  }
606  else {
607  for (; CurCycle != NextCycle; ++CurCycle) {
608  HazardRec->RecedeCycle();
609  }
610  }
611  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
612  // available Q to release pending nodes at least once before popping.
613  ReleasePending();
614 }
615 
616 /// Move the scheduler state forward until the specified node's dependents are
617 /// ready and can be scheduled with no resource conflicts.
618 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
619  if (DisableSchedCycles)
620  return;
621 
622  // FIXME: Nodes such as CopyFromReg probably should not advance the current
623  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
624  // has predecessors the cycle will be advanced when they are scheduled.
625  // But given the crude nature of modeling latency though such nodes, we
626  // currently need to treat these nodes like real instructions.
627  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
628 
629  unsigned ReadyCycle = SU->getHeight();
630 
631  // Bump CurCycle to account for latency. We assume the latency of other
632  // available instructions may be hidden by the stall (not a full pipe stall).
633  // This updates the hazard recognizer's cycle before reserving resources for
634  // this instruction.
635  AdvanceToCycle(ReadyCycle);
636 
637  // Calls are scheduled in their preceding cycle, so don't conflict with
638  // hazards from instructions after the call. EmitNode will reset the
639  // scoreboard state before emitting the call.
640  if (SU->isCall)
641  return;
642 
643  // FIXME: For resource conflicts in very long non-pipelined stages, we
644  // should probably skip ahead here to avoid useless scoreboard checks.
645  int Stalls = 0;
646  while (true) {
648  HazardRec->getHazardType(SU, -Stalls);
649 
651  break;
652 
653  ++Stalls;
654  }
655  AdvanceToCycle(CurCycle + Stalls);
656 }
657 
658 /// Record this SUnit in the HazardRecognizer.
659 /// Does not update CurCycle.
660 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
661  if (!HazardRec->isEnabled())
662  return;
663 
664  // Check for phys reg copy.
665  if (!SU->getNode())
666  return;
667 
668  switch (SU->getNode()->getOpcode()) {
669  default:
670  assert(SU->getNode()->isMachineOpcode() &&
671  "This target-independent node should not be scheduled.");
672  break;
673  case ISD::MERGE_VALUES:
674  case ISD::TokenFactor:
675  case ISD::LIFETIME_START:
676  case ISD::LIFETIME_END:
677  case ISD::CopyToReg:
678  case ISD::CopyFromReg:
679  case ISD::EH_LABEL:
680  // Noops don't affect the scoreboard state. Copies are likely to be
681  // removed.
682  return;
683  case ISD::INLINEASM:
684  // For inline asm, clear the pipeline state.
685  HazardRec->Reset();
686  return;
687  }
688  if (SU->isCall) {
689  // Calls are scheduled with their preceding instructions. For bottom-up
690  // scheduling, clear the pipeline state before emitting.
691  HazardRec->Reset();
692  }
693 
694  HazardRec->EmitInstruction(SU);
695 }
696 
697 static void resetVRegCycle(SUnit *SU);
698 
699 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
700 /// count of its predecessors. If a predecessor pending count is zero, add it to
701 /// the Available queue.
702 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
703  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
704  DEBUG(SU->dump(this));
705 
706 #ifndef NDEBUG
707  if (CurCycle < SU->getHeight())
708  DEBUG(dbgs() << " Height [" << SU->getHeight()
709  << "] pipeline stall!\n");
710 #endif
711 
712  // FIXME: Do not modify node height. It may interfere with
713  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
714  // node its ready cycle can aid heuristics, and after scheduling it can
715  // indicate the scheduled cycle.
716  SU->setHeightToAtLeast(CurCycle);
717 
718  // Reserve resources for the scheduled instruction.
719  EmitNode(SU);
720 
721  Sequence.push_back(SU);
722 
723  AvailableQueue->scheduledNode(SU);
724 
725  // If HazardRec is disabled, and each inst counts as one cycle, then
726  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
727  // PendingQueue for schedulers that implement HasReadyFilter.
728  if (!HazardRec->isEnabled() && AvgIPC < 2)
729  AdvanceToCycle(CurCycle + 1);
730 
731  // Update liveness of predecessors before successors to avoid treating a
732  // two-address node as a live range def.
733  ReleasePredecessors(SU);
734 
735  // Release all the implicit physical register defs that are live.
736  for (SDep &Succ : SU->Succs) {
737  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
738  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
739  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
740  --NumLiveRegs;
741  LiveRegDefs[Succ.getReg()] = nullptr;
742  LiveRegGens[Succ.getReg()] = nullptr;
743  releaseInterferences(Succ.getReg());
744  }
745  }
746  // Release the special call resource dependence, if this is the beginning
747  // of a call.
748  unsigned CallResource = TRI->getNumRegs();
749  if (LiveRegDefs[CallResource] == SU)
750  for (const SDNode *SUNode = SU->getNode(); SUNode;
751  SUNode = SUNode->getGluedNode()) {
752  if (SUNode->isMachineOpcode() &&
753  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
754  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
755  --NumLiveRegs;
756  LiveRegDefs[CallResource] = nullptr;
757  LiveRegGens[CallResource] = nullptr;
758  releaseInterferences(CallResource);
759  }
760  }
761 
762  resetVRegCycle(SU);
763 
764  SU->isScheduled = true;
765 
766  // Conditions under which the scheduler should eagerly advance the cycle:
767  // (1) No available instructions
768  // (2) All pipelines full, so available instructions must have hazards.
769  //
770  // If HazardRec is disabled, the cycle was pre-advanced before calling
771  // ReleasePredecessors. In that case, IssueCount should remain 0.
772  //
773  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
774  if (HazardRec->isEnabled() || AvgIPC > 1) {
775  if (SU->getNode() && SU->getNode()->isMachineOpcode())
776  ++IssueCount;
777  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
778  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
779  AdvanceToCycle(CurCycle + 1);
780  }
781 }
782 
783 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
784 /// unscheduled, increase the succ left count of its predecessors. Remove
785 /// them from AvailableQueue if necessary.
786 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
787  SUnit *PredSU = PredEdge->getSUnit();
788  if (PredSU->isAvailable) {
789  PredSU->isAvailable = false;
790  if (!PredSU->isPending)
791  AvailableQueue->remove(PredSU);
792  }
793 
794  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
795  ++PredSU->NumSuccsLeft;
796 }
797 
798 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
799 /// its predecessor states to reflect the change.
800 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
801  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
802  DEBUG(SU->dump(this));
803 
804  for (SDep &Pred : SU->Preds) {
805  CapturePred(&Pred);
806  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
807  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
808  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
809  "Physical register dependency violated?");
810  --NumLiveRegs;
811  LiveRegDefs[Pred.getReg()] = nullptr;
812  LiveRegGens[Pred.getReg()] = nullptr;
813  releaseInterferences(Pred.getReg());
814  }
815  }
816 
817  // Reclaim the special call resource dependence, if this is the beginning
818  // of a call.
819  unsigned CallResource = TRI->getNumRegs();
820  for (const SDNode *SUNode = SU->getNode(); SUNode;
821  SUNode = SUNode->getGluedNode()) {
822  if (SUNode->isMachineOpcode() &&
823  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
824  ++NumLiveRegs;
825  LiveRegDefs[CallResource] = SU;
826  LiveRegGens[CallResource] = CallSeqEndForStart[SU];
827  }
828  }
829 
830  // Release the special call resource dependence, if this is the end
831  // of a call.
832  if (LiveRegGens[CallResource] == SU)
833  for (const SDNode *SUNode = SU->getNode(); SUNode;
834  SUNode = SUNode->getGluedNode()) {
835  if (SUNode->isMachineOpcode() &&
836  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
837  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
838  --NumLiveRegs;
839  LiveRegDefs[CallResource] = nullptr;
840  LiveRegGens[CallResource] = nullptr;
841  releaseInterferences(CallResource);
842  }
843  }
844 
845  for (auto &Succ : SU->Succs) {
846  if (Succ.isAssignedRegDep()) {
847  auto Reg = Succ.getReg();
848  if (!LiveRegDefs[Reg])
849  ++NumLiveRegs;
850  // This becomes the nearest def. Note that an earlier def may still be
851  // pending if this is a two-address node.
852  LiveRegDefs[Reg] = SU;
853 
854  // Update LiveRegGen only if was empty before this unscheduling.
855  // This is to avoid incorrect updating LiveRegGen set in previous run.
856  if (!LiveRegGens[Reg]) {
857  // Find the successor with the lowest height.
858  LiveRegGens[Reg] = Succ.getSUnit();
859  for (auto &Succ2 : SU->Succs) {
860  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
861  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
862  LiveRegGens[Reg] = Succ2.getSUnit();
863  }
864  }
865  }
866  }
867  if (SU->getHeight() < MinAvailableCycle)
868  MinAvailableCycle = SU->getHeight();
869 
870  SU->setHeightDirty();
871  SU->isScheduled = false;
872  SU->isAvailable = true;
873  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
874  // Don't make available until backtracking is complete.
875  SU->isPending = true;
876  PendingQueue.push_back(SU);
877  }
878  else {
879  AvailableQueue->push(SU);
880  }
881  AvailableQueue->unscheduledNode(SU);
882 }
883 
884 /// After backtracking, the hazard checker needs to be restored to a state
885 /// corresponding the current cycle.
886 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
887  HazardRec->Reset();
888 
889  unsigned LookAhead = std::min((unsigned)Sequence.size(),
890  HazardRec->getMaxLookAhead());
891  if (LookAhead == 0)
892  return;
893 
894  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
895  unsigned HazardCycle = (*I)->getHeight();
896  for (auto E = Sequence.end(); I != E; ++I) {
897  SUnit *SU = *I;
898  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
899  HazardRec->RecedeCycle();
900  }
901  EmitNode(SU);
902  }
903 }
904 
905 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
906 /// BTCycle in order to schedule a specific node.
907 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
908  SUnit *OldSU = Sequence.back();
909  while (true) {
910  Sequence.pop_back();
911  // FIXME: use ready cycle instead of height
912  CurCycle = OldSU->getHeight();
913  UnscheduleNodeBottomUp(OldSU);
914  AvailableQueue->setCurCycle(CurCycle);
915  if (OldSU == BtSU)
916  break;
917  OldSU = Sequence.back();
918  }
919 
920  assert(!SU->isSucc(OldSU) && "Something is wrong!");
921 
922  RestoreHazardCheckerBottomUp();
923 
924  ReleasePending();
925 
926  ++NumBacktracks;
927 }
928 
929 static bool isOperandOf(const SUnit *SU, SDNode *N) {
930  for (const SDNode *SUNode = SU->getNode(); SUNode;
931  SUNode = SUNode->getGluedNode()) {
932  if (SUNode->isOperandOf(N))
933  return true;
934  }
935  return false;
936 }
937 
938 /// TryUnfold - Attempt to unfold
939 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
940  SDNode *N = SU->getNode();
941  // Use while over if to ease fall through.
942  SmallVector<SDNode *, 2> NewNodes;
943  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
944  return nullptr;
945 
946  // unfolding an x86 DEC64m operation results in store, dec, load which
947  // can't be handled here so quit
948  if (NewNodes.size() == 3)
949  return nullptr;
950 
951  assert(NewNodes.size() == 2 && "Expected a load folding node!");
952 
953  N = NewNodes[1];
954  SDNode *LoadNode = NewNodes[0];
955  unsigned NumVals = N->getNumValues();
956  unsigned OldNumVals = SU->getNode()->getNumValues();
957 
958  // LoadNode may already exist. This can happen when there is another
959  // load from the same location and producing the same type of value
960  // but it has different alignment or volatileness.
961  bool isNewLoad = true;
962  SUnit *LoadSU;
963  if (LoadNode->getNodeId() != -1) {
964  LoadSU = &SUnits[LoadNode->getNodeId()];
965  // If LoadSU has already been scheduled, we should clone it but
966  // this would negate the benefit to unfolding so just return SU.
967  if (LoadSU->isScheduled)
968  return SU;
969  isNewLoad = false;
970  } else {
971  LoadSU = CreateNewSUnit(LoadNode);
972  LoadNode->setNodeId(LoadSU->NodeNum);
973 
974  InitNumRegDefsLeft(LoadSU);
975  computeLatency(LoadSU);
976  }
977 
978  DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
979 
980  // Now that we are committed to unfolding replace DAG Uses.
981  for (unsigned i = 0; i != NumVals; ++i)
982  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
983  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
984  SDValue(LoadNode, 1));
985 
986  SUnit *NewSU = CreateNewSUnit(N);
987  assert(N->getNodeId() == -1 && "Node already inserted!");
988  N->setNodeId(NewSU->NodeNum);
989 
990  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
991  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
992  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
993  NewSU->isTwoAddress = true;
994  break;
995  }
996  }
997  if (MCID.isCommutable())
998  NewSU->isCommutable = true;
999 
1000  InitNumRegDefsLeft(NewSU);
1001  computeLatency(NewSU);
1002 
1003  // Record all the edges to and from the old SU, by category.
1004  SmallVector<SDep, 4> ChainPreds;
1005  SmallVector<SDep, 4> ChainSuccs;
1006  SmallVector<SDep, 4> LoadPreds;
1007  SmallVector<SDep, 4> NodePreds;
1008  SmallVector<SDep, 4> NodeSuccs;
1009  for (SDep &Pred : SU->Preds) {
1010  if (Pred.isCtrl())
1011  ChainPreds.push_back(Pred);
1012  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1013  LoadPreds.push_back(Pred);
1014  else
1015  NodePreds.push_back(Pred);
1016  }
1017  for (SDep &Succ : SU->Succs) {
1018  if (Succ.isCtrl())
1019  ChainSuccs.push_back(Succ);
1020  else
1021  NodeSuccs.push_back(Succ);
1022  }
1023 
1024  // Now assign edges to the newly-created nodes.
1025  for (const SDep &Pred : ChainPreds) {
1026  RemovePred(SU, Pred);
1027  if (isNewLoad)
1028  AddPred(LoadSU, Pred);
1029  }
1030  for (const SDep &Pred : LoadPreds) {
1031  RemovePred(SU, Pred);
1032  if (isNewLoad)
1033  AddPred(LoadSU, Pred);
1034  }
1035  for (const SDep &Pred : NodePreds) {
1036  RemovePred(SU, Pred);
1037  AddPred(NewSU, Pred);
1038  }
1039  for (SDep D : NodeSuccs) {
1040  SUnit *SuccDep = D.getSUnit();
1041  D.setSUnit(SU);
1042  RemovePred(SuccDep, D);
1043  D.setSUnit(NewSU);
1044  AddPred(SuccDep, D);
1045  // Balance register pressure.
1046  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1047  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1048  --NewSU->NumRegDefsLeft;
1049  }
1050  for (SDep D : ChainSuccs) {
1051  SUnit *SuccDep = D.getSUnit();
1052  D.setSUnit(SU);
1053  RemovePred(SuccDep, D);
1054  if (isNewLoad) {
1055  D.setSUnit(LoadSU);
1056  AddPred(SuccDep, D);
1057  }
1058  }
1059 
1060  // Add a data dependency to reflect that NewSU reads the value defined
1061  // by LoadSU.
1062  SDep D(LoadSU, SDep::Data, 0);
1063  D.setLatency(LoadSU->Latency);
1064  AddPred(NewSU, D);
1065 
1066  if (isNewLoad)
1067  AvailableQueue->addNode(LoadSU);
1068  AvailableQueue->addNode(NewSU);
1069 
1070  ++NumUnfolds;
1071 
1072  if (NewSU->NumSuccsLeft == 0)
1073  NewSU->isAvailable = true;
1074 
1075  return NewSU;
1076 }
1077 
1078 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1079 /// successors to the newly created node.
1080 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1081  SDNode *N = SU->getNode();
1082  if (!N)
1083  return nullptr;
1084 
1085  if (SU->getNode()->getGluedNode())
1086  return nullptr;
1087 
1088  SUnit *NewSU;
1089  bool TryUnfold = false;
1090  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1091  MVT VT = N->getSimpleValueType(i);
1092  if (VT == MVT::Glue)
1093  return nullptr;
1094  else if (VT == MVT::Other)
1095  TryUnfold = true;
1096  }
1097  for (const SDValue &Op : N->op_values()) {
1098  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1099  if (VT == MVT::Glue)
1100  return nullptr;
1101  }
1102 
1103  // If possible unfold instruction.
1104  if (TryUnfold) {
1105  SUnit *UnfoldSU = TryUnfoldSU(SU);
1106  if (!UnfoldSU)
1107  return nullptr;
1108  SU = UnfoldSU;
1109  N = SU->getNode();
1110  // If this can be scheduled don't bother duplicating and just return
1111  if (SU->NumSuccsLeft == 0)
1112  return SU;
1113  }
1114 
1115  DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1116  NewSU = CreateClone(SU);
1117 
1118  // New SUnit has the exact same predecessors.
1119  for (SDep &Pred : SU->Preds)
1120  if (!Pred.isArtificial())
1121  AddPred(NewSU, Pred);
1122 
1123  // Only copy scheduled successors. Cut them from old node's successor
1124  // list and move them over.
1126  for (SDep &Succ : SU->Succs) {
1127  if (Succ.isArtificial())
1128  continue;
1129  SUnit *SuccSU = Succ.getSUnit();
1130  if (SuccSU->isScheduled) {
1131  SDep D = Succ;
1132  D.setSUnit(NewSU);
1133  AddPred(SuccSU, D);
1134  D.setSUnit(SU);
1135  DelDeps.push_back(std::make_pair(SuccSU, D));
1136  }
1137  }
1138  for (auto &DelDep : DelDeps)
1139  RemovePred(DelDep.first, DelDep.second);
1140 
1141  AvailableQueue->updateNode(SU);
1142  AvailableQueue->addNode(NewSU);
1143 
1144  ++NumDups;
1145  return NewSU;
1146 }
1147 
1148 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1149 /// scheduled successors of the given SUnit to the last copy.
1150 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1151  const TargetRegisterClass *DestRC,
1152  const TargetRegisterClass *SrcRC,
1153  SmallVectorImpl<SUnit*> &Copies) {
1154  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1155  CopyFromSU->CopySrcRC = SrcRC;
1156  CopyFromSU->CopyDstRC = DestRC;
1157 
1158  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1159  CopyToSU->CopySrcRC = DestRC;
1160  CopyToSU->CopyDstRC = SrcRC;
1161 
1162  // Only copy scheduled successors. Cut them from old node's successor
1163  // list and move them over.
1165  for (SDep &Succ : SU->Succs) {
1166  if (Succ.isArtificial())
1167  continue;
1168  SUnit *SuccSU = Succ.getSUnit();
1169  if (SuccSU->isScheduled) {
1170  SDep D = Succ;
1171  D.setSUnit(CopyToSU);
1172  AddPred(SuccSU, D);
1173  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1174  }
1175  else {
1176  // Avoid scheduling the def-side copy before other successors. Otherwise
1177  // we could introduce another physreg interference on the copy and
1178  // continue inserting copies indefinitely.
1179  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1180  }
1181  }
1182  for (auto &DelDep : DelDeps)
1183  RemovePred(DelDep.first, DelDep.second);
1184 
1185  SDep FromDep(SU, SDep::Data, Reg);
1186  FromDep.setLatency(SU->Latency);
1187  AddPred(CopyFromSU, FromDep);
1188  SDep ToDep(CopyFromSU, SDep::Data, 0);
1189  ToDep.setLatency(CopyFromSU->Latency);
1190  AddPred(CopyToSU, ToDep);
1191 
1192  AvailableQueue->updateNode(SU);
1193  AvailableQueue->addNode(CopyFromSU);
1194  AvailableQueue->addNode(CopyToSU);
1195  Copies.push_back(CopyFromSU);
1196  Copies.push_back(CopyToSU);
1197 
1198  ++NumPRCopies;
1199 }
1200 
1201 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1202 /// definition of the specified node.
1203 /// FIXME: Move to SelectionDAG?
1204 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1205  const TargetInstrInfo *TII) {
1206  unsigned NumRes;
1207  if (N->getOpcode() == ISD::CopyFromReg) {
1208  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1209  NumRes = 1;
1210  } else {
1211  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1212  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1213  NumRes = MCID.getNumDefs();
1214  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1215  if (Reg == *ImpDef)
1216  break;
1217  ++NumRes;
1218  }
1219  }
1220  return N->getSimpleValueType(NumRes);
1221 }
1222 
1223 /// CheckForLiveRegDef - Return true and update live register vector if the
1224 /// specified register def of the specified SUnit clobbers any "live" registers.
1225 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1226  SUnit **LiveRegDefs,
1227  SmallSet<unsigned, 4> &RegAdded,
1229  const TargetRegisterInfo *TRI) {
1230  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1231 
1232  // Check if Ref is live.
1233  if (!LiveRegDefs[*AliasI]) continue;
1234 
1235  // Allow multiple uses of the same def.
1236  if (LiveRegDefs[*AliasI] == SU) continue;
1237 
1238  // Add Reg to the set of interfering live regs.
1239  if (RegAdded.insert(*AliasI).second) {
1240  LRegs.push_back(*AliasI);
1241  }
1242  }
1243 }
1244 
1245 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1246 /// by RegMask, and add them to LRegs.
1247 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1248  ArrayRef<SUnit*> LiveRegDefs,
1249  SmallSet<unsigned, 4> &RegAdded,
1250  SmallVectorImpl<unsigned> &LRegs) {
1251  // Look at all live registers. Skip Reg0 and the special CallResource.
1252  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1253  if (!LiveRegDefs[i]) continue;
1254  if (LiveRegDefs[i] == SU) continue;
1255  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1256  if (RegAdded.insert(i).second)
1257  LRegs.push_back(i);
1258  }
1259 }
1260 
1261 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1262 static const uint32_t *getNodeRegMask(const SDNode *N) {
1263  for (const SDValue &Op : N->op_values())
1264  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1265  return RegOp->getRegMask();
1266  return nullptr;
1267 }
1268 
1269 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1270 /// scheduling of the given node to satisfy live physical register dependencies.
1271 /// If the specific node is the last one that's available to schedule, do
1272 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1273 bool ScheduleDAGRRList::
1274 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1275  if (NumLiveRegs == 0)
1276  return false;
1277 
1278  SmallSet<unsigned, 4> RegAdded;
1279  // If this node would clobber any "live" register, then it's not ready.
1280  //
1281  // If SU is the currently live definition of the same register that it uses,
1282  // then we are free to schedule it.
1283  for (SDep &Pred : SU->Preds) {
1284  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1285  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1286  RegAdded, LRegs, TRI);
1287  }
1288 
1289  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1290  if (Node->getOpcode() == ISD::INLINEASM) {
1291  // Inline asm can clobber physical defs.
1292  unsigned NumOps = Node->getNumOperands();
1293  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1294  --NumOps; // Ignore the glue operand.
1295 
1296  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1297  unsigned Flags =
1298  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1299  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1300 
1301  ++i; // Skip the ID value.
1302  if (InlineAsm::isRegDefKind(Flags) ||
1304  InlineAsm::isClobberKind(Flags)) {
1305  // Check for def of register or earlyclobber register.
1306  for (; NumVals; --NumVals, ++i) {
1307  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1309  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1310  }
1311  } else
1312  i += NumVals;
1313  }
1314  continue;
1315  }
1316 
1317  if (!Node->isMachineOpcode())
1318  continue;
1319  // If we're in the middle of scheduling a call, don't begin scheduling
1320  // another call. Also, don't allow any physical registers to be live across
1321  // the call.
1322  if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
1323  (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
1324  // Check the special calling-sequence resource.
1325  unsigned CallResource = TRI->getNumRegs();
1326  if (LiveRegDefs[CallResource]) {
1327  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1328  while (SDNode *Glued = Gen->getGluedNode())
1329  Gen = Glued;
1330  if (!IsChainDependent(Gen, Node, 0, TII) &&
1331  RegAdded.insert(CallResource).second)
1332  LRegs.push_back(CallResource);
1333  }
1334  }
1335  if (const uint32_t *RegMask = getNodeRegMask(Node))
1336  CheckForLiveRegDefMasked(SU, RegMask,
1337  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1338  RegAdded, LRegs);
1339 
1340  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1341  if (MCID.hasOptionalDef()) {
1342  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1343  // This operand can be either a def of CPSR, if the S bit is set; or a use
1344  // of %noreg. When the OptionalDef is set to a valid register, we need to
1345  // handle it in the same way as an ImplicitDef.
1346  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1347  if (MCID.OpInfo[i].isOptionalDef()) {
1348  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1349  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1350  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1351  }
1352  }
1353  if (!MCID.ImplicitDefs)
1354  continue;
1355  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1356  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1357  }
1358 
1359  return !LRegs.empty();
1360 }
1361 
1362 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1363  // Add the nodes that aren't ready back onto the available list.
1364  for (unsigned i = Interferences.size(); i > 0; --i) {
1365  SUnit *SU = Interferences[i-1];
1366  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1367  if (Reg) {
1368  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1369  if (!is_contained(LRegs, Reg))
1370  continue;
1371  }
1372  SU->isPending = false;
1373  // The interfering node may no longer be available due to backtracking.
1374  // Furthermore, it may have been made available again, in which case it is
1375  // now already in the AvailableQueue.
1376  if (SU->isAvailable && !SU->NodeQueueId) {
1377  DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1378  AvailableQueue->push(SU);
1379  }
1380  if (i < Interferences.size())
1381  Interferences[i-1] = Interferences.back();
1382  Interferences.pop_back();
1383  LRegsMap.erase(LRegsPos);
1384  }
1385 }
1386 
1387 /// Return a node that can be scheduled in this cycle. Requirements:
1388 /// (1) Ready: latency has been satisfied
1389 /// (2) No Hazards: resources are available
1390 /// (3) No Interferences: may unschedule to break register interferences.
1391 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1392  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1393  while (CurSU) {
1395  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1396  break;
1397  DEBUG(dbgs() << " Interfering reg " <<
1398  (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1399  : TRI->getName(LRegs[0]))
1400  << " SU #" << CurSU->NodeNum << '\n');
1401  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1402  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1403  if (LRegsPair.second) {
1404  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1405  Interferences.push_back(CurSU);
1406  }
1407  else {
1408  assert(CurSU->isPending && "Interferences are pending");
1409  // Update the interference with current live regs.
1410  LRegsPair.first->second = LRegs;
1411  }
1412  CurSU = AvailableQueue->pop();
1413  }
1414  if (CurSU)
1415  return CurSU;
1416 
1417  // All candidates are delayed due to live physical reg dependencies.
1418  // Try backtracking, code duplication, or inserting cross class copies
1419  // to resolve it.
1420  for (SUnit *TrySU : Interferences) {
1421  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1422 
1423  // Try unscheduling up to the point where it's safe to schedule
1424  // this node.
1425  SUnit *BtSU = nullptr;
1426  unsigned LiveCycle = UINT_MAX;
1427  for (unsigned Reg : LRegs) {
1428  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1429  BtSU = LiveRegGens[Reg];
1430  LiveCycle = BtSU->getHeight();
1431  }
1432  }
1433  if (!WillCreateCycle(TrySU, BtSU)) {
1434  // BacktrackBottomUp mutates Interferences!
1435  BacktrackBottomUp(TrySU, BtSU);
1436 
1437  // Force the current node to be scheduled before the node that
1438  // requires the physical reg dep.
1439  if (BtSU->isAvailable) {
1440  BtSU->isAvailable = false;
1441  if (!BtSU->isPending)
1442  AvailableQueue->remove(BtSU);
1443  }
1444  DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1445  << TrySU->NodeNum << ")\n");
1446  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1447 
1448  // If one or more successors has been unscheduled, then the current
1449  // node is no longer available.
1450  if (!TrySU->isAvailable || !TrySU->NodeQueueId)
1451  CurSU = AvailableQueue->pop();
1452  else {
1453  // Available and in AvailableQueue
1454  AvailableQueue->remove(TrySU);
1455  CurSU = TrySU;
1456  }
1457  // Interferences has been mutated. We must break.
1458  break;
1459  }
1460  }
1461 
1462  if (!CurSU) {
1463  // Can't backtrack. If it's too expensive to copy the value, then try
1464  // duplicate the nodes that produces these "too expensive to copy"
1465  // values to break the dependency. In case even that doesn't work,
1466  // insert cross class copies.
1467  // If it's not too expensive, i.e. cost != -1, issue copies.
1468  SUnit *TrySU = Interferences[0];
1469  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1470  assert(LRegs.size() == 1 && "Can't handle this yet!");
1471  unsigned Reg = LRegs[0];
1472  SUnit *LRDef = LiveRegDefs[Reg];
1473  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1474  const TargetRegisterClass *RC =
1475  TRI->getMinimalPhysRegClass(Reg, VT);
1476  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1477 
1478  // If cross copy register class is the same as RC, then it must be possible
1479  // copy the value directly. Do not try duplicate the def.
1480  // If cross copy register class is not the same as RC, then it's possible to
1481  // copy the value but it require cross register class copies and it is
1482  // expensive.
1483  // If cross copy register class is null, then it's not possible to copy
1484  // the value at all.
1485  SUnit *NewDef = nullptr;
1486  if (DestRC != RC) {
1487  NewDef = CopyAndMoveSuccessors(LRDef);
1488  if (!DestRC && !NewDef)
1489  report_fatal_error("Can't handle live physical register dependency!");
1490  }
1491  if (!NewDef) {
1492  // Issue copies, these can be expensive cross register class copies.
1493  SmallVector<SUnit*, 2> Copies;
1494  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1495  DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1496  << " to SU #" << Copies.front()->NodeNum << "\n");
1497  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1498  NewDef = Copies.back();
1499  }
1500 
1501  DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1502  << " to SU #" << TrySU->NodeNum << "\n");
1503  LiveRegDefs[Reg] = NewDef;
1504  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1505  TrySU->isAvailable = false;
1506  CurSU = NewDef;
1507  }
1508  assert(CurSU && "Unable to resolve live physical register dependencies!");
1509  return CurSU;
1510 }
1511 
1512 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1513 /// schedulers.
1514 void ScheduleDAGRRList::ListScheduleBottomUp() {
1515  // Release any predecessors of the special Exit node.
1516  ReleasePredecessors(&ExitSU);
1517 
1518  // Add root to Available queue.
1519  if (!SUnits.empty()) {
1520  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1521  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1522  RootSU->isAvailable = true;
1523  AvailableQueue->push(RootSU);
1524  }
1525 
1526  // While Available queue is not empty, grab the node with the highest
1527  // priority. If it is not ready put it back. Schedule the node.
1528  Sequence.reserve(SUnits.size());
1529  while (!AvailableQueue->empty() || !Interferences.empty()) {
1530  DEBUG(dbgs() << "\nExamining Available:\n";
1531  AvailableQueue->dump(this));
1532 
1533  // Pick the best node to schedule taking all constraints into
1534  // consideration.
1535  SUnit *SU = PickNodeToScheduleBottomUp();
1536 
1537  AdvancePastStalls(SU);
1538 
1539  ScheduleNodeBottomUp(SU);
1540 
1541  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1542  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1543  assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1544  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1545  }
1546  }
1547 
1548  // Reverse the order if it is bottom up.
1549  std::reverse(Sequence.begin(), Sequence.end());
1550 
1551 #ifndef NDEBUG
1552  VerifyScheduledSequence(/*isBottomUp=*/true);
1553 #endif
1554 }
1555 
1556 //===----------------------------------------------------------------------===//
1557 // RegReductionPriorityQueue Definition
1558 //===----------------------------------------------------------------------===//
1559 //
1560 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1561 // to reduce register pressure.
1562 //
1563 namespace {
1564 class RegReductionPQBase;
1565 
1566 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1567  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1568 };
1569 
1570 #ifndef NDEBUG
1571 template<class SF>
1572 struct reverse_sort : public queue_sort {
1573  SF &SortFunc;
1574  reverse_sort(SF &sf) : SortFunc(sf) {}
1575 
1576  bool operator()(SUnit* left, SUnit* right) const {
1577  // reverse left/right rather than simply !SortFunc(left, right)
1578  // to expose different paths in the comparison logic.
1579  return SortFunc(right, left);
1580  }
1581 };
1582 #endif // NDEBUG
1583 
1584 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1585 // reduction scheduler.
1586 struct bu_ls_rr_sort : public queue_sort {
1587  enum {
1588  IsBottomUp = true,
1589  HasReadyFilter = false
1590  };
1591 
1592  RegReductionPQBase *SPQ;
1593  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1594 
1595  bool operator()(SUnit* left, SUnit* right) const;
1596 };
1597 
1598 // src_ls_rr_sort - Priority function for source order scheduler.
1599 struct src_ls_rr_sort : public queue_sort {
1600  enum {
1601  IsBottomUp = true,
1602  HasReadyFilter = false
1603  };
1604 
1605  RegReductionPQBase *SPQ;
1606  src_ls_rr_sort(RegReductionPQBase *spq)
1607  : SPQ(spq) {}
1608 
1609  bool operator()(SUnit* left, SUnit* right) const;
1610 };
1611 
1612 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1613 struct hybrid_ls_rr_sort : public queue_sort {
1614  enum {
1615  IsBottomUp = true,
1616  HasReadyFilter = false
1617  };
1618 
1619  RegReductionPQBase *SPQ;
1620  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1621  : SPQ(spq) {}
1622 
1623  bool isReady(SUnit *SU, unsigned CurCycle) const;
1624 
1625  bool operator()(SUnit* left, SUnit* right) const;
1626 };
1627 
1628 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1629 // scheduler.
1630 struct ilp_ls_rr_sort : public queue_sort {
1631  enum {
1632  IsBottomUp = true,
1633  HasReadyFilter = false
1634  };
1635 
1636  RegReductionPQBase *SPQ;
1637  ilp_ls_rr_sort(RegReductionPQBase *spq)
1638  : SPQ(spq) {}
1639 
1640  bool isReady(SUnit *SU, unsigned CurCycle) const;
1641 
1642  bool operator()(SUnit* left, SUnit* right) const;
1643 };
1644 
1645 class RegReductionPQBase : public SchedulingPriorityQueue {
1646 protected:
1647  std::vector<SUnit*> Queue;
1648  unsigned CurQueueId;
1649  bool TracksRegPressure;
1650  bool SrcOrder;
1651 
1652  // SUnits - The SUnits for the current graph.
1653  std::vector<SUnit> *SUnits;
1654 
1655  MachineFunction &MF;
1656  const TargetInstrInfo *TII;
1657  const TargetRegisterInfo *TRI;
1658  const TargetLowering *TLI;
1659  ScheduleDAGRRList *scheduleDAG;
1660 
1661  // SethiUllmanNumbers - The SethiUllman number for each node.
1662  std::vector<unsigned> SethiUllmanNumbers;
1663 
1664  /// RegPressure - Tracking current reg pressure per register class.
1665  ///
1666  std::vector<unsigned> RegPressure;
1667 
1668  /// RegLimit - Tracking the number of allocatable registers per register
1669  /// class.
1670  std::vector<unsigned> RegLimit;
1671 
1672 public:
1673  RegReductionPQBase(MachineFunction &mf,
1674  bool hasReadyFilter,
1675  bool tracksrp,
1676  bool srcorder,
1677  const TargetInstrInfo *tii,
1678  const TargetRegisterInfo *tri,
1679  const TargetLowering *tli)
1680  : SchedulingPriorityQueue(hasReadyFilter),
1681  CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1682  MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
1683  if (TracksRegPressure) {
1684  unsigned NumRC = TRI->getNumRegClasses();
1685  RegLimit.resize(NumRC);
1686  RegPressure.resize(NumRC);
1687  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1688  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1689  for (const TargetRegisterClass *RC : TRI->regclasses())
1690  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1691  }
1692  }
1693 
1694  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1695  scheduleDAG = scheduleDag;
1696  }
1697 
1698  ScheduleHazardRecognizer* getHazardRec() {
1699  return scheduleDAG->getHazardRec();
1700  }
1701 
1702  void initNodes(std::vector<SUnit> &sunits) override;
1703 
1704  void addNode(const SUnit *SU) override;
1705 
1706  void updateNode(const SUnit *SU) override;
1707 
1708  void releaseState() override {
1709  SUnits = nullptr;
1710  SethiUllmanNumbers.clear();
1711  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1712  }
1713 
1714  unsigned getNodePriority(const SUnit *SU) const;
1715 
1716  unsigned getNodeOrdering(const SUnit *SU) const {
1717  if (!SU->getNode()) return 0;
1718 
1719  return SU->getNode()->getIROrder();
1720  }
1721 
1722  bool empty() const override { return Queue.empty(); }
1723 
1724  void push(SUnit *U) override {
1725  assert(!U->NodeQueueId && "Node in the queue already");
1726  U->NodeQueueId = ++CurQueueId;
1727  Queue.push_back(U);
1728  }
1729 
1730  void remove(SUnit *SU) override {
1731  assert(!Queue.empty() && "Queue is empty!");
1732  assert(SU->NodeQueueId != 0 && "Not in queue!");
1733  std::vector<SUnit *>::iterator I = find(Queue, SU);
1734  if (I != std::prev(Queue.end()))
1735  std::swap(*I, Queue.back());
1736  Queue.pop_back();
1737  SU->NodeQueueId = 0;
1738  }
1739 
1740  bool tracksRegPressure() const override { return TracksRegPressure; }
1741 
1742  void dumpRegPressure() const;
1743 
1744  bool HighRegPressure(const SUnit *SU) const;
1745 
1746  bool MayReduceRegPressure(SUnit *SU) const;
1747 
1748  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1749 
1750  void scheduledNode(SUnit *SU) override;
1751 
1752  void unscheduledNode(SUnit *SU) override;
1753 
1754 protected:
1755  bool canClobber(const SUnit *SU, const SUnit *Op);
1756  void AddPseudoTwoAddrDeps();
1757  void PrescheduleNodesWithMultipleUses();
1758  void CalculateSethiUllmanNumbers();
1759 };
1760 
1761 template<class SF>
1762 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1763  std::vector<SUnit *>::iterator Best = Q.begin();
1764  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1765  if (Picker(*Best, *I))
1766  Best = I;
1767  SUnit *V = *Best;
1768  if (Best != std::prev(Q.end()))
1769  std::swap(*Best, Q.back());
1770  Q.pop_back();
1771  return V;
1772 }
1773 
1774 template<class SF>
1775 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1776 #ifndef NDEBUG
1777  if (DAG->StressSched) {
1778  reverse_sort<SF> RPicker(Picker);
1779  return popFromQueueImpl(Q, RPicker);
1780  }
1781 #endif
1782  (void)DAG;
1783  return popFromQueueImpl(Q, Picker);
1784 }
1785 
1786 template<class SF>
1787 class RegReductionPriorityQueue : public RegReductionPQBase {
1788  SF Picker;
1789 
1790 public:
1791  RegReductionPriorityQueue(MachineFunction &mf,
1792  bool tracksrp,
1793  bool srcorder,
1794  const TargetInstrInfo *tii,
1795  const TargetRegisterInfo *tri,
1796  const TargetLowering *tli)
1797  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1798  tii, tri, tli),
1799  Picker(this) {}
1800 
1801  bool isBottomUp() const override { return SF::IsBottomUp; }
1802 
1803  bool isReady(SUnit *U) const override {
1804  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1805  }
1806 
1807  SUnit *pop() override {
1808  if (Queue.empty()) return nullptr;
1809 
1810  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1811  V->NodeQueueId = 0;
1812  return V;
1813  }
1814 
1815 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1816  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1817  // Emulate pop() without clobbering NodeQueueIds.
1818  std::vector<SUnit*> DumpQueue = Queue;
1819  SF DumpPicker = Picker;
1820  while (!DumpQueue.empty()) {
1821  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1822  dbgs() << "Height " << SU->getHeight() << ": ";
1823  SU->dump(DAG);
1824  }
1825  }
1826 #endif
1827 };
1828 
1829 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1830 BURegReductionPriorityQueue;
1831 
1832 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1833 SrcRegReductionPriorityQueue;
1834 
1835 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1836 HybridBURRPriorityQueue;
1837 
1838 typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1839 ILPBURRPriorityQueue;
1840 } // end anonymous namespace
1841 
1842 //===----------------------------------------------------------------------===//
1843 // Static Node Priority for Register Pressure Reduction
1844 //===----------------------------------------------------------------------===//
1845 
1846 // Check for special nodes that bypass scheduling heuristics.
1847 // Currently this pushes TokenFactor nodes down, but may be used for other
1848 // pseudo-ops as well.
1849 //
1850 // Return -1 to schedule right above left, 1 for left above right.
1851 // Return 0 if no bias exists.
1852 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1853  bool LSchedLow = left->isScheduleLow;
1854  bool RSchedLow = right->isScheduleLow;
1855  if (LSchedLow != RSchedLow)
1856  return LSchedLow < RSchedLow ? 1 : -1;
1857  return 0;
1858 }
1859 
1860 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1861 /// Smaller number is the higher priority.
1862 static unsigned
1863 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1864  if (SUNumbers[SU->NodeNum] != 0)
1865  return SUNumbers[SU->NodeNum];
1866 
1867  // Use WorkList to avoid stack overflow on excessively large IRs.
1868  struct WorkState {
1869  WorkState(const SUnit *SU) : SU(SU) {}
1870  const SUnit *SU;
1871  unsigned PredsProcessed = 0;
1872  };
1873 
1874  SmallVector<WorkState, 16> WorkList;
1875  WorkList.push_back(SU);
1876  while (!WorkList.empty()) {
1877  auto &Temp = WorkList.back();
1878  auto *TempSU = Temp.SU;
1879  bool AllPredsKnown = true;
1880  // Try to find a non-evaluated pred and push it into the processing stack.
1881  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1882  auto &Pred = TempSU->Preds[P];
1883  if (Pred.isCtrl()) continue; // ignore chain preds
1884  SUnit *PredSU = Pred.getSUnit();
1885  if (SUNumbers[PredSU->NodeNum] == 0) {
1886 #ifndef NDEBUG
1887  // In debug mode, check that we don't have such element in the stack.
1888  for (auto It : WorkList)
1889  assert(It.SU != PredSU && "Trying to push an element twice?");
1890 #endif
1891  // Next time start processing this one starting from the next pred.
1892  Temp.PredsProcessed = P + 1;
1893  WorkList.push_back(PredSU);
1894  AllPredsKnown = false;
1895  break;
1896  }
1897  }
1898 
1899  if (!AllPredsKnown)
1900  continue;
1901 
1902  // Once all preds are known, we can calculate the answer for this one.
1903  unsigned SethiUllmanNumber = 0;
1904  unsigned Extra = 0;
1905  for (const SDep &Pred : TempSU->Preds) {
1906  if (Pred.isCtrl()) continue; // ignore chain preds
1907  SUnit *PredSU = Pred.getSUnit();
1908  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1909  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1910  if (PredSethiUllman > SethiUllmanNumber) {
1911  SethiUllmanNumber = PredSethiUllman;
1912  Extra = 0;
1913  } else if (PredSethiUllman == SethiUllmanNumber)
1914  ++Extra;
1915  }
1916 
1917  SethiUllmanNumber += Extra;
1918  if (SethiUllmanNumber == 0)
1919  SethiUllmanNumber = 1;
1920  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1921  WorkList.pop_back();
1922  }
1923 
1924  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1925  return SUNumbers[SU->NodeNum];
1926 }
1927 
1928 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1929 /// scheduling units.
1930 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1931  SethiUllmanNumbers.assign(SUnits->size(), 0);
1932 
1933  for (const SUnit &SU : *SUnits)
1934  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1935 }
1936 
1937 void RegReductionPQBase::addNode(const SUnit *SU) {
1938  unsigned SUSize = SethiUllmanNumbers.size();
1939  if (SUnits->size() > SUSize)
1940  SethiUllmanNumbers.resize(SUSize*2, 0);
1941  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1942 }
1943 
1944 void RegReductionPQBase::updateNode(const SUnit *SU) {
1945  SethiUllmanNumbers[SU->NodeNum] = 0;
1946  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1947 }
1948 
1949 // Lower priority means schedule further down. For bottom-up scheduling, lower
1950 // priority SUs are scheduled before higher priority SUs.
1951 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1952  assert(SU->NodeNum < SethiUllmanNumbers.size());
1953  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1954  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1955  // CopyToReg should be close to its uses to facilitate coalescing and
1956  // avoid spilling.
1957  return 0;
1958  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1959  Opc == TargetOpcode::SUBREG_TO_REG ||
1960  Opc == TargetOpcode::INSERT_SUBREG)
1961  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1962  // close to their uses to facilitate coalescing.
1963  return 0;
1964  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1965  // If SU does not have a register use, i.e. it doesn't produce a value
1966  // that would be consumed (e.g. store), then it terminates a chain of
1967  // computation. Give it a large SethiUllman number so it will be
1968  // scheduled right before its predecessors that it doesn't lengthen
1969  // their live ranges.
1970  return 0xffff;
1971  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1972  // If SU does not have a register def, schedule it close to its uses
1973  // because it does not lengthen any live ranges.
1974  return 0;
1975 #if 1
1976  return SethiUllmanNumbers[SU->NodeNum];
1977 #else
1978  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1979  if (SU->isCallOp) {
1980  // FIXME: This assumes all of the defs are used as call operands.
1981  int NP = (int)Priority - SU->getNode()->getNumValues();
1982  return (NP > 0) ? NP : 0;
1983  }
1984  return Priority;
1985 #endif
1986 }
1987 
1988 //===----------------------------------------------------------------------===//
1989 // Register Pressure Tracking
1990 //===----------------------------------------------------------------------===//
1991 
1992 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1993 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
1994  for (const TargetRegisterClass *RC : TRI->regclasses()) {
1995  unsigned Id = RC->getID();
1996  unsigned RP = RegPressure[Id];
1997  if (!RP) continue;
1998  DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
1999  << RegLimit[Id] << '\n');
2000  }
2001 }
2002 #endif
2003 
2004 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2005  if (!TLI)
2006  return false;
2007 
2008  for (const SDep &Pred : SU->Preds) {
2009  if (Pred.isCtrl())
2010  continue;
2011  SUnit *PredSU = Pred.getSUnit();
2012  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2013  // to cover the number of registers defined (they are all live).
2014  if (PredSU->NumRegDefsLeft == 0) {
2015  continue;
2016  }
2017  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2018  RegDefPos.IsValid(); RegDefPos.Advance()) {
2019  unsigned RCId, Cost;
2020  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2021 
2022  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2023  return true;
2024  }
2025  }
2026  return false;
2027 }
2028 
2029 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2030  const SDNode *N = SU->getNode();
2031 
2032  if (!N->isMachineOpcode() || !SU->NumSuccs)
2033  return false;
2034 
2035  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2036  for (unsigned i = 0; i != NumDefs; ++i) {
2037  MVT VT = N->getSimpleValueType(i);
2038  if (!N->hasAnyUseOfValue(i))
2039  continue;
2040  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2041  if (RegPressure[RCId] >= RegLimit[RCId])
2042  return true;
2043  }
2044  return false;
2045 }
2046 
2047 // Compute the register pressure contribution by this instruction by count up
2048 // for uses that are not live and down for defs. Only count register classes
2049 // that are already under high pressure. As a side effect, compute the number of
2050 // uses of registers that are already live.
2051 //
2052 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2053 // so could probably be factored.
2054 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2055  LiveUses = 0;
2056  int PDiff = 0;
2057  for (const SDep &Pred : SU->Preds) {
2058  if (Pred.isCtrl())
2059  continue;
2060  SUnit *PredSU = Pred.getSUnit();
2061  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2062  // to cover the number of registers defined (they are all live).
2063  if (PredSU->NumRegDefsLeft == 0) {
2064  if (PredSU->getNode()->isMachineOpcode())
2065  ++LiveUses;
2066  continue;
2067  }
2068  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2069  RegDefPos.IsValid(); RegDefPos.Advance()) {
2070  MVT VT = RegDefPos.GetValue();
2071  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2072  if (RegPressure[RCId] >= RegLimit[RCId])
2073  ++PDiff;
2074  }
2075  }
2076  const SDNode *N = SU->getNode();
2077 
2078  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2079  return PDiff;
2080 
2081  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2082  for (unsigned i = 0; i != NumDefs; ++i) {
2083  MVT VT = N->getSimpleValueType(i);
2084  if (!N->hasAnyUseOfValue(i))
2085  continue;
2086  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2087  if (RegPressure[RCId] >= RegLimit[RCId])
2088  --PDiff;
2089  }
2090  return PDiff;
2091 }
2092 
2093 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2094  if (!TracksRegPressure)
2095  return;
2096 
2097  if (!SU->getNode())
2098  return;
2099 
2100  for (const SDep &Pred : SU->Preds) {
2101  if (Pred.isCtrl())
2102  continue;
2103  SUnit *PredSU = Pred.getSUnit();
2104  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2105  // to cover the number of registers defined (they are all live).
2106  if (PredSU->NumRegDefsLeft == 0) {
2107  continue;
2108  }
2109  // FIXME: The ScheduleDAG currently loses information about which of a
2110  // node's values is consumed by each dependence. Consequently, if the node
2111  // defines multiple register classes, we don't know which to pressurize
2112  // here. Instead the following loop consumes the register defs in an
2113  // arbitrary order. At least it handles the common case of clustered loads
2114  // to the same class. For precise liveness, each SDep needs to indicate the
2115  // result number. But that tightly couples the ScheduleDAG with the
2116  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2117  // value type or register class to SDep.
2118  //
2119  // The most important aspect of register tracking is balancing the increase
2120  // here with the reduction further below. Note that this SU may use multiple
2121  // defs in PredSU. The can't be determined here, but we've already
2122  // compensated by reducing NumRegDefsLeft in PredSU during
2123  // ScheduleDAGSDNodes::AddSchedEdges.
2124  --PredSU->NumRegDefsLeft;
2125  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2126  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2127  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2128  if (SkipRegDefs)
2129  continue;
2130 
2131  unsigned RCId, Cost;
2132  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2133  RegPressure[RCId] += Cost;
2134  break;
2135  }
2136  }
2137 
2138  // We should have this assert, but there may be dead SDNodes that never
2139  // materialize as SUnits, so they don't appear to generate liveness.
2140  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2141  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2142  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2143  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2144  if (SkipRegDefs > 0)
2145  continue;
2146  unsigned RCId, Cost;
2147  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2148  if (RegPressure[RCId] < Cost) {
2149  // Register pressure tracking is imprecise. This can happen. But we try
2150  // hard not to let it happen because it likely results in poor scheduling.
2151  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
2152  RegPressure[RCId] = 0;
2153  }
2154  else {
2155  RegPressure[RCId] -= Cost;
2156  }
2157  }
2158  DEBUG(dumpRegPressure());
2159 }
2160 
2161 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2162  if (!TracksRegPressure)
2163  return;
2164 
2165  const SDNode *N = SU->getNode();
2166  if (!N) return;
2167 
2168  if (!N->isMachineOpcode()) {
2169  if (N->getOpcode() != ISD::CopyToReg)
2170  return;
2171  } else {
2172  unsigned Opc = N->getMachineOpcode();
2173  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2174  Opc == TargetOpcode::INSERT_SUBREG ||
2175  Opc == TargetOpcode::SUBREG_TO_REG ||
2176  Opc == TargetOpcode::REG_SEQUENCE ||
2177  Opc == TargetOpcode::IMPLICIT_DEF)
2178  return;
2179  }
2180 
2181  for (const SDep &Pred : SU->Preds) {
2182  if (Pred.isCtrl())
2183  continue;
2184  SUnit *PredSU = Pred.getSUnit();
2185  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2186  // counts data deps.
2187  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2188  continue;
2189  const SDNode *PN = PredSU->getNode();
2190  if (!PN->isMachineOpcode()) {
2191  if (PN->getOpcode() == ISD::CopyFromReg) {
2192  MVT VT = PN->getSimpleValueType(0);
2193  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2194  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2195  }
2196  continue;
2197  }
2198  unsigned POpc = PN->getMachineOpcode();
2199  if (POpc == TargetOpcode::IMPLICIT_DEF)
2200  continue;
2201  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2202  POpc == TargetOpcode::INSERT_SUBREG ||
2203  POpc == TargetOpcode::SUBREG_TO_REG) {
2204  MVT VT = PN->getSimpleValueType(0);
2205  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2206  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2207  continue;
2208  }
2209  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2210  for (unsigned i = 0; i != NumDefs; ++i) {
2211  MVT VT = PN->getSimpleValueType(i);
2212  if (!PN->hasAnyUseOfValue(i))
2213  continue;
2214  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2215  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2216  // Register pressure tracking is imprecise. This can happen.
2217  RegPressure[RCId] = 0;
2218  else
2219  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2220  }
2221  }
2222 
2223  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2224  // may transfer data dependencies to CopyToReg.
2225  if (SU->NumSuccs && N->isMachineOpcode()) {
2226  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2227  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2228  MVT VT = N->getSimpleValueType(i);
2229  if (VT == MVT::Glue || VT == MVT::Other)
2230  continue;
2231  if (!N->hasAnyUseOfValue(i))
2232  continue;
2233  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2234  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2235  }
2236  }
2237 
2238  DEBUG(dumpRegPressure());
2239 }
2240 
2241 //===----------------------------------------------------------------------===//
2242 // Dynamic Node Priority for Register Pressure Reduction
2243 //===----------------------------------------------------------------------===//
2244 
2245 /// closestSucc - Returns the scheduled cycle of the successor which is
2246 /// closest to the current cycle.
2247 static unsigned closestSucc(const SUnit *SU) {
2248  unsigned MaxHeight = 0;
2249  for (const SDep &Succ : SU->Succs) {
2250  if (Succ.isCtrl()) continue; // ignore chain succs
2251  unsigned Height = Succ.getSUnit()->getHeight();
2252  // If there are bunch of CopyToRegs stacked up, they should be considered
2253  // to be at the same position.
2254  if (Succ.getSUnit()->getNode() &&
2255  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2256  Height = closestSucc(Succ.getSUnit())+1;
2257  if (Height > MaxHeight)
2258  MaxHeight = Height;
2259  }
2260  return MaxHeight;
2261 }
2262 
2263 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2264 /// for scratch registers, i.e. number of data dependencies.
2265 static unsigned calcMaxScratches(const SUnit *SU) {
2266  unsigned Scratches = 0;
2267  for (const SDep &Pred : SU->Preds) {
2268  if (Pred.isCtrl()) continue; // ignore chain preds
2269  Scratches++;
2270  }
2271  return Scratches;
2272 }
2273 
2274 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2275 /// CopyFromReg from a virtual register.
2276 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2277  bool RetVal = false;
2278  for (const SDep &Pred : SU->Preds) {
2279  if (Pred.isCtrl()) continue;
2280  const SUnit *PredSU = Pred.getSUnit();
2281  if (PredSU->getNode() &&
2282  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2283  unsigned Reg =
2284  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2286  RetVal = true;
2287  continue;
2288  }
2289  }
2290  return false;
2291  }
2292  return RetVal;
2293 }
2294 
2295 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2296 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2297 /// it has no other use. It should be scheduled closer to the terminator.
2298 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2299  bool RetVal = false;
2300  for (const SDep &Succ : SU->Succs) {
2301  if (Succ.isCtrl()) continue;
2302  const SUnit *SuccSU = Succ.getSUnit();
2303  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2304  unsigned Reg =
2305  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2307  RetVal = true;
2308  continue;
2309  }
2310  }
2311  return false;
2312  }
2313  return RetVal;
2314 }
2315 
2316 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2317 // set isVRegCycle for its CopyFromReg operands.
2318 //
2319 // This is only relevant for single-block loops, in which case the VRegCycle
2320 // node is likely an induction variable in which the operand and target virtual
2321 // registers should be coalesced (e.g. pre/post increment values). Setting the
2322 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2323 // CopyFromReg so that this node becomes the virtual register "kill". This
2324 // avoids interference between the values live in and out of the block and
2325 // eliminates a copy inside the loop.
2326 static void initVRegCycle(SUnit *SU) {
2328  return;
2329 
2330  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2331  return;
2332 
2333  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2334 
2335  SU->isVRegCycle = true;
2336 
2337  for (const SDep &Pred : SU->Preds) {
2338  if (Pred.isCtrl()) continue;
2339  Pred.getSUnit()->isVRegCycle = true;
2340  }
2341 }
2342 
2343 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2344 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2345 static void resetVRegCycle(SUnit *SU) {
2346  if (!SU->isVRegCycle)
2347  return;
2348 
2349  for (const SDep &Pred : SU->Preds) {
2350  if (Pred.isCtrl()) continue; // ignore chain preds
2351  SUnit *PredSU = Pred.getSUnit();
2352  if (PredSU->isVRegCycle) {
2353  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2354  "VRegCycle def must be CopyFromReg");
2355  Pred.getSUnit()->isVRegCycle = false;
2356  }
2357  }
2358 }
2359 
2360 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2361 // means a node that defines the VRegCycle has not been scheduled yet.
2362 static bool hasVRegCycleUse(const SUnit *SU) {
2363  // If this SU also defines the VReg, don't hoist it as a "use".
2364  if (SU->isVRegCycle)
2365  return false;
2366 
2367  for (const SDep &Pred : SU->Preds) {
2368  if (Pred.isCtrl()) continue; // ignore chain preds
2369  if (Pred.getSUnit()->isVRegCycle &&
2370  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2371  DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2372  return true;
2373  }
2374  }
2375  return false;
2376 }
2377 
2378 // Check for either a dependence (latency) or resource (hazard) stall.
2379 //
2380 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2381 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2382  if ((int)SPQ->getCurCycle() < Height) return true;
2383  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2385  return true;
2386  return false;
2387 }
2388 
2389 // Return -1 if left has higher priority, 1 if right has higher priority.
2390 // Return 0 if latency-based priority is equivalent.
2391 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2392  RegReductionPQBase *SPQ) {
2393  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2394  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2395  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2396  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2397  int LHeight = (int)left->getHeight() + LPenalty;
2398  int RHeight = (int)right->getHeight() + RPenalty;
2399 
2400  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2401  BUHasStall(left, LHeight, SPQ);
2402  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2403  BUHasStall(right, RHeight, SPQ);
2404 
2405  // If scheduling one of the node will cause a pipeline stall, delay it.
2406  // If scheduling either one of the node will cause a pipeline stall, sort
2407  // them according to their height.
2408  if (LStall) {
2409  if (!RStall)
2410  return 1;
2411  if (LHeight != RHeight)
2412  return LHeight > RHeight ? 1 : -1;
2413  } else if (RStall)
2414  return -1;
2415 
2416  // If either node is scheduling for latency, sort them by height/depth
2417  // and latency.
2418  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2419  right->SchedulingPref == Sched::ILP)) {
2420  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2421  // is enabled, grouping instructions by cycle, then its height is already
2422  // covered so only its depth matters. We also reach this point if both stall
2423  // but have the same height.
2424  if (!SPQ->getHazardRec()->isEnabled()) {
2425  if (LHeight != RHeight)
2426  return LHeight > RHeight ? 1 : -1;
2427  }
2428  int LDepth = left->getDepth() - LPenalty;
2429  int RDepth = right->getDepth() - RPenalty;
2430  if (LDepth != RDepth) {
2431  DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2432  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2433  << ") depth " << RDepth << "\n");
2434  return LDepth < RDepth ? 1 : -1;
2435  }
2436  if (left->Latency != right->Latency)
2437  return left->Latency > right->Latency ? 1 : -1;
2438  }
2439  return 0;
2440 }
2441 
2442 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2443  // Schedule physical register definitions close to their use. This is
2444  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2445  // long as shortening physreg live ranges is generally good, we can defer
2446  // creating a subtarget hook.
2447  if (!DisableSchedPhysRegJoin) {
2448  bool LHasPhysReg = left->hasPhysRegDefs;
2449  bool RHasPhysReg = right->hasPhysRegDefs;
2450  if (LHasPhysReg != RHasPhysReg) {
2451  #ifndef NDEBUG
2452  static const char *const PhysRegMsg[] = { " has no physreg",
2453  " defines a physreg" };
2454  #endif
2455  DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2456  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2457  << PhysRegMsg[RHasPhysReg] << "\n");
2458  return LHasPhysReg < RHasPhysReg;
2459  }
2460  }
2461 
2462  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2463  unsigned LPriority = SPQ->getNodePriority(left);
2464  unsigned RPriority = SPQ->getNodePriority(right);
2465 
2466  // Be really careful about hoisting call operands above previous calls.
2467  // Only allows it if it would reduce register pressure.
2468  if (left->isCall && right->isCallOp) {
2469  unsigned RNumVals = right->getNode()->getNumValues();
2470  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2471  }
2472  if (right->isCall && left->isCallOp) {
2473  unsigned LNumVals = left->getNode()->getNumValues();
2474  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2475  }
2476 
2477  if (LPriority != RPriority)
2478  return LPriority > RPriority;
2479 
2480  // One or both of the nodes are calls and their sethi-ullman numbers are the
2481  // same, then keep source order.
2482  if (left->isCall || right->isCall) {
2483  unsigned LOrder = SPQ->getNodeOrdering(left);
2484  unsigned ROrder = SPQ->getNodeOrdering(right);
2485 
2486  // Prefer an ordering where the lower the non-zero order number, the higher
2487  // the preference.
2488  if ((LOrder || ROrder) && LOrder != ROrder)
2489  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2490  }
2491 
2492  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2493  // e.g.
2494  // t1 = op t2, c1
2495  // t3 = op t4, c2
2496  //
2497  // and the following instructions are both ready.
2498  // t2 = op c3
2499  // t4 = op c4
2500  //
2501  // Then schedule t2 = op first.
2502  // i.e.
2503  // t4 = op c4
2504  // t2 = op c3
2505  // t1 = op t2, c1
2506  // t3 = op t4, c2
2507  //
2508  // This creates more short live intervals.
2509  unsigned LDist = closestSucc(left);
2510  unsigned RDist = closestSucc(right);
2511  if (LDist != RDist)
2512  return LDist < RDist;
2513 
2514  // How many registers becomes live when the node is scheduled.
2515  unsigned LScratch = calcMaxScratches(left);
2516  unsigned RScratch = calcMaxScratches(right);
2517  if (LScratch != RScratch)
2518  return LScratch > RScratch;
2519 
2520  // Comparing latency against a call makes little sense unless the node
2521  // is register pressure-neutral.
2522  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2523  return (left->NodeQueueId > right->NodeQueueId);
2524 
2525  // Do not compare latencies when one or both of the nodes are calls.
2526  if (!DisableSchedCycles &&
2527  !(left->isCall || right->isCall)) {
2528  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2529  if (result != 0)
2530  return result > 0;
2531  }
2532  else {
2533  if (left->getHeight() != right->getHeight())
2534  return left->getHeight() > right->getHeight();
2535 
2536  if (left->getDepth() != right->getDepth())
2537  return left->getDepth() < right->getDepth();
2538  }
2539 
2540  assert(left->NodeQueueId && right->NodeQueueId &&
2541  "NodeQueueId cannot be zero");
2542  return (left->NodeQueueId > right->NodeQueueId);
2543 }
2544 
2545 // Bottom up
2546 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2547  if (int res = checkSpecialNodes(left, right))
2548  return res > 0;
2549 
2550  return BURRSort(left, right, SPQ);
2551 }
2552 
2553 // Source order, otherwise bottom up.
2554 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2555  if (int res = checkSpecialNodes(left, right))
2556  return res > 0;
2557 
2558  unsigned LOrder = SPQ->getNodeOrdering(left);
2559  unsigned ROrder = SPQ->getNodeOrdering(right);
2560 
2561  // Prefer an ordering where the lower the non-zero order number, the higher
2562  // the preference.
2563  if ((LOrder || ROrder) && LOrder != ROrder)
2564  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2565 
2566  return BURRSort(left, right, SPQ);
2567 }
2568 
2569 // If the time between now and when the instruction will be ready can cover
2570 // the spill code, then avoid adding it to the ready queue. This gives long
2571 // stalls highest priority and allows hoisting across calls. It should also
2572 // speed up processing the available queue.
2573 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2574  static const unsigned ReadyDelay = 3;
2575 
2576  if (SPQ->MayReduceRegPressure(SU)) return true;
2577 
2578  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2579 
2580  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2582  return false;
2583 
2584  return true;
2585 }
2586 
2587 // Return true if right should be scheduled with higher priority than left.
2588 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2589  if (int res = checkSpecialNodes(left, right))
2590  return res > 0;
2591 
2592  if (left->isCall || right->isCall)
2593  // No way to compute latency of calls.
2594  return BURRSort(left, right, SPQ);
2595 
2596  bool LHigh = SPQ->HighRegPressure(left);
2597  bool RHigh = SPQ->HighRegPressure(right);
2598  // Avoid causing spills. If register pressure is high, schedule for
2599  // register pressure reduction.
2600  if (LHigh && !RHigh) {
2601  DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2602  << right->NodeNum << ")\n");
2603  return true;
2604  }
2605  else if (!LHigh && RHigh) {
2606  DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2607  << left->NodeNum << ")\n");
2608  return false;
2609  }
2610  if (!LHigh && !RHigh) {
2611  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2612  if (result != 0)
2613  return result > 0;
2614  }
2615  return BURRSort(left, right, SPQ);
2616 }
2617 
2618 // Schedule as many instructions in each cycle as possible. So don't make an
2619 // instruction available unless it is ready in the current cycle.
2620 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2621  if (SU->getHeight() > CurCycle) return false;
2622 
2623  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2625  return false;
2626 
2627  return true;
2628 }
2629 
2630 static bool canEnableCoalescing(SUnit *SU) {
2631  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2632  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2633  // CopyToReg should be close to its uses to facilitate coalescing and
2634  // avoid spilling.
2635  return true;
2636 
2637  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2638  Opc == TargetOpcode::SUBREG_TO_REG ||
2639  Opc == TargetOpcode::INSERT_SUBREG)
2640  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2641  // close to their uses to facilitate coalescing.
2642  return true;
2643 
2644  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2645  // If SU does not have a register def, schedule it close to its uses
2646  // because it does not lengthen any live ranges.
2647  return true;
2648 
2649  return false;
2650 }
2651 
2652 // list-ilp is currently an experimental scheduler that allows various
2653 // heuristics to be enabled prior to the normal register reduction logic.
2654 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2655  if (int res = checkSpecialNodes(left, right))
2656  return res > 0;
2657 
2658  if (left->isCall || right->isCall)
2659  // No way to compute latency of calls.
2660  return BURRSort(left, right, SPQ);
2661 
2662  unsigned LLiveUses = 0, RLiveUses = 0;
2663  int LPDiff = 0, RPDiff = 0;
2665  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2666  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2667  }
2668  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2669  DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2670  << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2671  return LPDiff > RPDiff;
2672  }
2673 
2674  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2675  bool LReduce = canEnableCoalescing(left);
2676  bool RReduce = canEnableCoalescing(right);
2677  if (LReduce && !RReduce) return false;
2678  if (RReduce && !LReduce) return true;
2679  }
2680 
2681  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2682  DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2683  << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2684  return LLiveUses < RLiveUses;
2685  }
2686 
2687  if (!DisableSchedStalls) {
2688  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2689  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2690  if (LStall != RStall)
2691  return left->getHeight() > right->getHeight();
2692  }
2693 
2694  if (!DisableSchedCriticalPath) {
2695  int spread = (int)left->getDepth() - (int)right->getDepth();
2696  if (std::abs(spread) > MaxReorderWindow) {
2697  DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2698  << left->getDepth() << " != SU(" << right->NodeNum << "): "
2699  << right->getDepth() << "\n");
2700  return left->getDepth() < right->getDepth();
2701  }
2702  }
2703 
2704  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2705  int spread = (int)left->getHeight() - (int)right->getHeight();
2706  if (std::abs(spread) > MaxReorderWindow)
2707  return left->getHeight() > right->getHeight();
2708  }
2709 
2710  return BURRSort(left, right, SPQ);
2711 }
2712 
2713 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2714  SUnits = &sunits;
2715  // Add pseudo dependency edges for two-address nodes.
2716  if (!Disable2AddrHack)
2717  AddPseudoTwoAddrDeps();
2718  // Reroute edges to nodes with multiple uses.
2719  if (!TracksRegPressure && !SrcOrder)
2720  PrescheduleNodesWithMultipleUses();
2721  // Calculate node priorities.
2722  CalculateSethiUllmanNumbers();
2723 
2724  // For single block loops, mark nodes that look like canonical IV increments.
2725  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2726  for (SUnit &SU : sunits)
2727  initVRegCycle(&SU);
2728 }
2729 
2730 //===----------------------------------------------------------------------===//
2731 // Preschedule for Register Pressure
2732 //===----------------------------------------------------------------------===//
2733 
2734 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2735  if (SU->isTwoAddress) {
2736  unsigned Opc = SU->getNode()->getMachineOpcode();
2737  const MCInstrDesc &MCID = TII->get(Opc);
2738  unsigned NumRes = MCID.getNumDefs();
2739  unsigned NumOps = MCID.getNumOperands() - NumRes;
2740  for (unsigned i = 0; i != NumOps; ++i) {
2741  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2742  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2743  if (DU->getNodeId() != -1 &&
2744  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2745  return true;
2746  }
2747  }
2748  }
2749  return false;
2750 }
2751 
2752 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2753 /// successor's explicit physregs whose definition can reach DepSU.
2754 /// i.e. DepSU should not be scheduled above SU.
2755 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2756  ScheduleDAGRRList *scheduleDAG,
2757  const TargetInstrInfo *TII,
2758  const TargetRegisterInfo *TRI) {
2759  const MCPhysReg *ImpDefs
2760  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2761  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2762  if(!ImpDefs && !RegMask)
2763  return false;
2764 
2765  for (const SDep &Succ : SU->Succs) {
2766  SUnit *SuccSU = Succ.getSUnit();
2767  for (const SDep &SuccPred : SuccSU->Preds) {
2768  if (!SuccPred.isAssignedRegDep())
2769  continue;
2770 
2771  if (RegMask &&
2772  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2773  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2774  return true;
2775 
2776  if (ImpDefs)
2777  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2778  // Return true if SU clobbers this physical register use and the
2779  // definition of the register reaches from DepSU. IsReachable queries
2780  // a topological forward sort of the DAG (following the successors).
2781  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2782  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2783  return true;
2784  }
2785  }
2786  return false;
2787 }
2788 
2789 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2790 /// physical register defs.
2791 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2792  const TargetInstrInfo *TII,
2793  const TargetRegisterInfo *TRI) {
2794  SDNode *N = SuccSU->getNode();
2795  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2796  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2797  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2798  for (const SDNode *SUNode = SU->getNode(); SUNode;
2799  SUNode = SUNode->getGluedNode()) {
2800  if (!SUNode->isMachineOpcode())
2801  continue;
2802  const MCPhysReg *SUImpDefs =
2803  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2804  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2805  if (!SUImpDefs && !SURegMask)
2806  continue;
2807  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2808  MVT VT = N->getSimpleValueType(i);
2809  if (VT == MVT::Glue || VT == MVT::Other)
2810  continue;
2811  if (!N->hasAnyUseOfValue(i))
2812  continue;
2813  unsigned Reg = ImpDefs[i - NumDefs];
2814  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2815  return true;
2816  if (!SUImpDefs)
2817  continue;
2818  for (;*SUImpDefs; ++SUImpDefs) {
2819  unsigned SUReg = *SUImpDefs;
2820  if (TRI->regsOverlap(Reg, SUReg))
2821  return true;
2822  }
2823  }
2824  }
2825  return false;
2826 }
2827 
2828 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2829 /// are not handled well by the general register pressure reduction
2830 /// heuristics. When presented with code like this:
2831 ///
2832 /// N
2833 /// / |
2834 /// / |
2835 /// U store
2836 /// |
2837 /// ...
2838 ///
2839 /// the heuristics tend to push the store up, but since the
2840 /// operand of the store has another use (U), this would increase
2841 /// the length of that other use (the U->N edge).
2842 ///
2843 /// This function transforms code like the above to route U's
2844 /// dependence through the store when possible, like this:
2845 ///
2846 /// N
2847 /// ||
2848 /// ||
2849 /// store
2850 /// |
2851 /// U
2852 /// |
2853 /// ...
2854 ///
2855 /// This results in the store being scheduled immediately
2856 /// after N, which shortens the U->N live range, reducing
2857 /// register pressure.
2858 ///
2859 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2860  // Visit all the nodes in topological order, working top-down.
2861  for (SUnit &SU : *SUnits) {
2862  // For now, only look at nodes with no data successors, such as stores.
2863  // These are especially important, due to the heuristics in
2864  // getNodePriority for nodes with no data successors.
2865  if (SU.NumSuccs != 0)
2866  continue;
2867  // For now, only look at nodes with exactly one data predecessor.
2868  if (SU.NumPreds != 1)
2869  continue;
2870  // Avoid prescheduling copies to virtual registers, which don't behave
2871  // like other nodes from the perspective of scheduling heuristics.
2872  if (SDNode *N = SU.getNode())
2873  if (N->getOpcode() == ISD::CopyToReg &&
2875  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2876  continue;
2877 
2878  // Locate the single data predecessor.
2879  SUnit *PredSU = nullptr;
2880  for (const SDep &Pred : SU.Preds)
2881  if (!Pred.isCtrl()) {
2882  PredSU = Pred.getSUnit();
2883  break;
2884  }
2885  assert(PredSU);
2886 
2887  // Don't rewrite edges that carry physregs, because that requires additional
2888  // support infrastructure.
2889  if (PredSU->hasPhysRegDefs)
2890  continue;
2891  // Short-circuit the case where SU is PredSU's only data successor.
2892  if (PredSU->NumSuccs == 1)
2893  continue;
2894  // Avoid prescheduling to copies from virtual registers, which don't behave
2895  // like other nodes from the perspective of scheduling heuristics.
2896  if (SDNode *N = SU.getNode())
2897  if (N->getOpcode() == ISD::CopyFromReg &&
2899  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2900  continue;
2901 
2902  // Perform checks on the successors of PredSU.
2903  for (const SDep &PredSucc : PredSU->Succs) {
2904  SUnit *PredSuccSU = PredSucc.getSUnit();
2905  if (PredSuccSU == &SU) continue;
2906  // If PredSU has another successor with no data successors, for
2907  // now don't attempt to choose either over the other.
2908  if (PredSuccSU->NumSuccs == 0)
2909  goto outer_loop_continue;
2910  // Don't break physical register dependencies.
2911  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2912  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2913  goto outer_loop_continue;
2914  // Don't introduce graph cycles.
2915  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2916  goto outer_loop_continue;
2917  }
2918 
2919  // Ok, the transformation is safe and the heuristics suggest it is
2920  // profitable. Update the graph.
2921  DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
2922  << " next to PredSU #" << PredSU->NodeNum
2923  << " to guide scheduling in the presence of multiple uses\n");
2924  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2925  SDep Edge = PredSU->Succs[i];
2926  assert(!Edge.isAssignedRegDep());
2927  SUnit *SuccSU = Edge.getSUnit();
2928  if (SuccSU != &SU) {
2929  Edge.setSUnit(PredSU);
2930  scheduleDAG->RemovePred(SuccSU, Edge);
2931  scheduleDAG->AddPred(&SU, Edge);
2932  Edge.setSUnit(&SU);
2933  scheduleDAG->AddPred(SuccSU, Edge);
2934  --i;
2935  }
2936  }
2937  outer_loop_continue:;
2938  }
2939 }
2940 
2941 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2942 /// it as a def&use operand. Add a pseudo control edge from it to the other
2943 /// node (if it won't create a cycle) so the two-address one will be scheduled
2944 /// first (lower in the schedule). If both nodes are two-address, favor the
2945 /// one that has a CopyToReg use (more likely to be a loop induction update).
2946 /// If both are two-address, but one is commutable while the other is not
2947 /// commutable, favor the one that's not commutable.
2948 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2949  for (SUnit &SU : *SUnits) {
2950  if (!SU.isTwoAddress)
2951  continue;
2952 
2953  SDNode *Node = SU.getNode();
2954  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
2955  continue;
2956 
2957  bool isLiveOut = hasOnlyLiveOutUses(&SU);
2958  unsigned Opc = Node->getMachineOpcode();
2959  const MCInstrDesc &MCID = TII->get(Opc);
2960  unsigned NumRes = MCID.getNumDefs();
2961  unsigned NumOps = MCID.getNumOperands() - NumRes;
2962  for (unsigned j = 0; j != NumOps; ++j) {
2963  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2964  continue;
2965  SDNode *DU = SU.getNode()->getOperand(j).getNode();
2966  if (DU->getNodeId() == -1)
2967  continue;
2968  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2969  if (!DUSU)
2970  continue;
2971  for (const SDep &Succ : DUSU->Succs) {
2972  if (Succ.isCtrl())
2973  continue;
2974  SUnit *SuccSU = Succ.getSUnit();
2975  if (SuccSU == &SU)
2976  continue;
2977  // Be conservative. Ignore if nodes aren't at roughly the same
2978  // depth and height.
2979  if (SuccSU->getHeight() < SU.getHeight() &&
2980  (SU.getHeight() - SuccSU->getHeight()) > 1)
2981  continue;
2982  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2983  // constrains whatever is using the copy, instead of the copy
2984  // itself. In the case that the copy is coalesced, this
2985  // preserves the intent of the pseudo two-address heurietics.
2986  while (SuccSU->Succs.size() == 1 &&
2987  SuccSU->getNode()->isMachineOpcode() &&
2988  SuccSU->getNode()->getMachineOpcode() ==
2989  TargetOpcode::COPY_TO_REGCLASS)
2990  SuccSU = SuccSU->Succs.front().getSUnit();
2991  // Don't constrain non-instruction nodes.
2992  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2993  continue;
2994  // Don't constrain nodes with physical register defs if the
2995  // predecessor can clobber them.
2996  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
2997  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
2998  continue;
2999  }
3000  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3001  // these may be coalesced away. We want them close to their uses.
3002  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3003  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3004  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3005  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3006  continue;
3007  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3008  (!canClobber(SuccSU, DUSU) ||
3009  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3010  (!SU.isCommutable && SuccSU->isCommutable)) &&
3011  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3012  DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
3013  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3014  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3015  }
3016  }
3017  }
3018  }
3019 }
3020 
3021 //===----------------------------------------------------------------------===//
3022 // Public Constructor Functions
3023 //===----------------------------------------------------------------------===//
3024 
3027  CodeGenOpt::Level OptLevel) {
3028  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3029  const TargetInstrInfo *TII = STI.getInstrInfo();
3030  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3031 
3032  BURegReductionPriorityQueue *PQ =
3033  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3034  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3035  PQ->setScheduleDAG(SD);
3036  return SD;
3037 }
3038 
3041  CodeGenOpt::Level OptLevel) {
3042  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3043  const TargetInstrInfo *TII = STI.getInstrInfo();
3044  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3045 
3046  SrcRegReductionPriorityQueue *PQ =
3047  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3048  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3049  PQ->setScheduleDAG(SD);
3050  return SD;
3051 }
3052 
3055  CodeGenOpt::Level OptLevel) {
3056  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3057  const TargetInstrInfo *TII = STI.getInstrInfo();
3058  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3059  const TargetLowering *TLI = IS->TLI;
3060 
3061  HybridBURRPriorityQueue *PQ =
3062  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3063 
3064  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3065  PQ->setScheduleDAG(SD);
3066  return SD;
3067 }
3068 
3071  CodeGenOpt::Level OptLevel) {
3072  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3073  const TargetInstrInfo *TII = STI.getInstrInfo();
3074  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3075  const TargetLowering *TLI = IS->TLI;
3076 
3077  ILPBURRPriorityQueue *PQ =
3078  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3079  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3080  PQ->setScheduleDAG(SD);
3081  return SD;
3082 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:271
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:103
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:425
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:360
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:528
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
void dump(const ScheduleDAG *G) const
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:493
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:212
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:307
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:547
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:506
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:554
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
virtual void releaseState()=0
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
Reg
All possible values of the reg field in the ModR/M byte.
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:556
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:633
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:250
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:255
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
bool isPending
True once pending.
Definition: ScheduleDAG.h:287
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:794
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:58
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:534
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:99
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:238
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:279
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:295
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:638
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:187
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
#define E
Definition: LargeTest.cpp:27
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:839
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
const size_t N
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:309
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:552
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:281
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:288
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:270
static std::vector< std::string > Flags
Definition: FlagsTest.cpp:8
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1198
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:37
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:277
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:530
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
#define DEBUG(X)
Definition: Debug.h:118
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:291
void dumpAll(const ScheduleDAG *G) const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:286
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:444
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
#define D
Definition: LargeTest.cpp:26
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:872
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.