LLVM  6.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "ScheduleDAGSDNodes.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/MC/MCRegisterInfo.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPred - adds a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Updates the topological ordering if required.
225  void AddPred(SUnit *SU, const SDep &D) {
226  Topo.AddPred(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// RemovePred - removes a predecessor edge from SUnit SU.
231  /// This returns true if an edge was removed.
232  /// Updates the topological ordering if required.
233  void RemovePred(SUnit *SU, const SDep &D) {
234  Topo.RemovePred(SU, D.getSUnit());
235  SU->removePred(D);
236  }
237 
238 private:
239  bool isReady(SUnit *SU) {
240  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
241  AvailableQueue->isReady(SU);
242  }
243 
244  void ReleasePred(SUnit *SU, const SDep *PredEdge);
245  void ReleasePredecessors(SUnit *SU);
246  void ReleasePending();
247  void AdvanceToCycle(unsigned NextCycle);
248  void AdvancePastStalls(SUnit *SU);
249  void EmitNode(SUnit *SU);
250  void ScheduleNodeBottomUp(SUnit*);
251  void CapturePred(SDep *PredEdge);
252  void UnscheduleNodeBottomUp(SUnit*);
253  void RestoreHazardCheckerBottomUp();
254  void BacktrackBottomUp(SUnit*, SUnit*);
255  SUnit *TryUnfoldSU(SUnit *);
256  SUnit *CopyAndMoveSuccessors(SUnit*);
257  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
258  const TargetRegisterClass*,
259  const TargetRegisterClass*,
261  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
262 
263  void releaseInterferences(unsigned Reg = 0);
264 
265  SUnit *PickNodeToScheduleBottomUp();
266  void ListScheduleBottomUp();
267 
268  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
269  /// Updates the topological ordering if required.
270  SUnit *CreateNewSUnit(SDNode *N) {
271  unsigned NumSUnits = SUnits.size();
272  SUnit *NewNode = newSUnit(N);
273  // Update the topological ordering.
274  if (NewNode->NodeNum >= NumSUnits)
276  return NewNode;
277  }
278 
279  /// CreateClone - Creates a new SUnit from an existing one.
280  /// Updates the topological ordering if required.
281  SUnit *CreateClone(SUnit *N) {
282  unsigned NumSUnits = SUnits.size();
283  SUnit *NewNode = Clone(N);
284  // Update the topological ordering.
285  if (NewNode->NodeNum >= NumSUnits)
287  return NewNode;
288  }
289 
290  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
291  /// need actual latency information but the hybrid scheduler does.
292  bool forceUnitLatencies() const override {
293  return !NeedLatency;
294  }
295 };
296 
297 } // end anonymous namespace
298 
299 /// GetCostForDef - Looks up the register class and cost for a given definition.
300 /// Typically this just means looking up the representative register class,
301 /// but for untyped values (MVT::Untyped) it means inspecting the node's
302 /// opcode to determine what register class is being generated.
303 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
304  const TargetLowering *TLI,
305  const TargetInstrInfo *TII,
306  const TargetRegisterInfo *TRI,
307  unsigned &RegClass, unsigned &Cost,
308  const MachineFunction &MF) {
309  MVT VT = RegDefPos.GetValue();
310 
311  // Special handling for untyped values. These values can only come from
312  // the expansion of custom DAG-to-DAG patterns.
313  if (VT == MVT::Untyped) {
314  const SDNode *Node = RegDefPos.GetNode();
315 
316  // Special handling for CopyFromReg of untyped values.
317  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
318  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
319  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
320  RegClass = RC->getID();
321  Cost = 1;
322  return;
323  }
324 
325  unsigned Opcode = Node->getMachineOpcode();
326  if (Opcode == TargetOpcode::REG_SEQUENCE) {
327  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
328  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
329  RegClass = RC->getID();
330  Cost = 1;
331  return;
332  }
333 
334  unsigned Idx = RegDefPos.GetIdx();
335  const MCInstrDesc Desc = TII->get(Opcode);
336  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
337  RegClass = RC->getID();
338  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
339  // better way to determine it.
340  Cost = 1;
341  } else {
342  RegClass = TLI->getRepRegClassFor(VT)->getID();
343  Cost = TLI->getRepRegClassCostFor(VT);
344  }
345 }
346 
347 /// Schedule - Schedule the DAG using list scheduling.
348 void ScheduleDAGRRList::Schedule() {
349  DEBUG(dbgs()
350  << "********** List Scheduling BB#" << BB->getNumber()
351  << " '" << BB->getName() << "' **********\n");
352 
353  CurCycle = 0;
354  IssueCount = 0;
355  MinAvailableCycle =
357  NumLiveRegs = 0;
358  // Allocate slots for each physical register, plus one for a special register
359  // to track the virtual resource of a calling sequence.
360  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
361  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
362  CallSeqEndForStart.clear();
363  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
364 
365  // Build the scheduling graph.
366  BuildSchedGraph(nullptr);
367 
368  DEBUG(for (SUnit &SU : SUnits)
369  SU.dumpAll(this));
371 
372  AvailableQueue->initNodes(SUnits);
373 
374  HazardRec->Reset();
375 
376  // Execute the actual scheduling loop.
377  ListScheduleBottomUp();
378 
379  AvailableQueue->releaseState();
380 
381  DEBUG({
382  dbgs() << "*** Final schedule ***\n";
383  dumpSchedule();
384  dbgs() << '\n';
385  });
386 }
387 
388 //===----------------------------------------------------------------------===//
389 // Bottom-Up Scheduling
390 //===----------------------------------------------------------------------===//
391 
392 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
393 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
394 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
395  SUnit *PredSU = PredEdge->getSUnit();
396 
397 #ifndef NDEBUG
398  if (PredSU->NumSuccsLeft == 0) {
399  dbgs() << "*** Scheduling failed! ***\n";
400  PredSU->dump(this);
401  dbgs() << " has been released too many times!\n";
402  llvm_unreachable(nullptr);
403  }
404 #endif
405  --PredSU->NumSuccsLeft;
406 
407  if (!forceUnitLatencies()) {
408  // Updating predecessor's height. This is now the cycle when the
409  // predecessor can be scheduled without causing a pipeline stall.
410  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
411  }
412 
413  // If all the node's successors are scheduled, this node is ready
414  // to be scheduled. Ignore the special EntrySU node.
415  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
416  PredSU->isAvailable = true;
417 
418  unsigned Height = PredSU->getHeight();
419  if (Height < MinAvailableCycle)
420  MinAvailableCycle = Height;
421 
422  if (isReady(PredSU)) {
423  AvailableQueue->push(PredSU);
424  }
425  // CapturePred and others may have left the node in the pending queue, avoid
426  // adding it twice.
427  else if (!PredSU->isPending) {
428  PredSU->isPending = true;
429  PendingQueue.push_back(PredSU);
430  }
431  }
432 }
433 
434 /// IsChainDependent - Test if Outer is reachable from Inner through
435 /// chain dependencies.
436 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
437  unsigned NestLevel,
438  const TargetInstrInfo *TII) {
439  SDNode *N = Outer;
440  while (true) {
441  if (N == Inner)
442  return true;
443  // For a TokenFactor, examine each operand. There may be multiple ways
444  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
445  // most nesting in order to ensure that we find the corresponding match.
446  if (N->getOpcode() == ISD::TokenFactor) {
447  for (const SDValue &Op : N->op_values())
448  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
449  return true;
450  return false;
451  }
452  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
453  if (N->isMachineOpcode()) {
454  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
455  ++NestLevel;
456  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
457  if (NestLevel == 0)
458  return false;
459  --NestLevel;
460  }
461  }
462  // Otherwise, find the chain and continue climbing.
463  for (const SDValue &Op : N->op_values())
464  if (Op.getValueType() == MVT::Other) {
465  N = Op.getNode();
466  goto found_chain_operand;
467  }
468  return false;
469  found_chain_operand:;
470  if (N->getOpcode() == ISD::EntryToken)
471  return false;
472  }
473 }
474 
475 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
476 /// the corresponding (lowered) CALLSEQ_BEGIN node.
477 ///
478 /// NestLevel and MaxNested are used in recursion to indcate the current level
479 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
480 /// level seen so far.
481 ///
482 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
483 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
484 static SDNode *
485 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
486  const TargetInstrInfo *TII) {
487  while (true) {
488  // For a TokenFactor, examine each operand. There may be multiple ways
489  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
490  // most nesting in order to ensure that we find the corresponding match.
491  if (N->getOpcode() == ISD::TokenFactor) {
492  SDNode *Best = nullptr;
493  unsigned BestMaxNest = MaxNest;
494  for (const SDValue &Op : N->op_values()) {
495  unsigned MyNestLevel = NestLevel;
496  unsigned MyMaxNest = MaxNest;
497  if (SDNode *New = FindCallSeqStart(Op.getNode(),
498  MyNestLevel, MyMaxNest, TII))
499  if (!Best || (MyMaxNest > BestMaxNest)) {
500  Best = New;
501  BestMaxNest = MyMaxNest;
502  }
503  }
504  assert(Best);
505  MaxNest = BestMaxNest;
506  return Best;
507  }
508  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
509  if (N->isMachineOpcode()) {
510  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
511  ++NestLevel;
512  MaxNest = std::max(MaxNest, NestLevel);
513  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
514  assert(NestLevel != 0);
515  --NestLevel;
516  if (NestLevel == 0)
517  return N;
518  }
519  }
520  // Otherwise, find the chain and continue climbing.
521  for (const SDValue &Op : N->op_values())
522  if (Op.getValueType() == MVT::Other) {
523  N = Op.getNode();
524  goto found_chain_operand;
525  }
526  return nullptr;
527  found_chain_operand:;
528  if (N->getOpcode() == ISD::EntryToken)
529  return nullptr;
530  }
531 }
532 
533 /// Call ReleasePred for each predecessor, then update register live def/gen.
534 /// Always update LiveRegDefs for a register dependence even if the current SU
535 /// also defines the register. This effectively create one large live range
536 /// across a sequence of two-address node. This is important because the
537 /// entire chain must be scheduled together. Example:
538 ///
539 /// flags = (3) add
540 /// flags = (2) addc flags
541 /// flags = (1) addc flags
542 ///
543 /// results in
544 ///
545 /// LiveRegDefs[flags] = 3
546 /// LiveRegGens[flags] = 1
547 ///
548 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
549 /// interference on flags.
550 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
551  // Bottom up: release predecessors
552  for (SDep &Pred : SU->Preds) {
553  ReleasePred(SU, &Pred);
554  if (Pred.isAssignedRegDep()) {
555  // This is a physical register dependency and it's impossible or
556  // expensive to copy the register. Make sure nothing that can
557  // clobber the register is scheduled between the predecessor and
558  // this node.
559  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
560  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
561  "interference on register dependence");
562  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
563  if (!LiveRegGens[Pred.getReg()]) {
564  ++NumLiveRegs;
565  LiveRegGens[Pred.getReg()] = SU;
566  }
567  }
568  }
569 
570  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
571  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
572  // these nodes, to prevent other calls from being interscheduled with them.
573  unsigned CallResource = TRI->getNumRegs();
574  if (!LiveRegDefs[CallResource])
575  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
576  if (Node->isMachineOpcode() &&
577  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
578  unsigned NestLevel = 0;
579  unsigned MaxNest = 0;
580  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
581  assert(N && "Must find call sequence start");
582 
583  SUnit *Def = &SUnits[N->getNodeId()];
584  CallSeqEndForStart[Def] = SU;
585 
586  ++NumLiveRegs;
587  LiveRegDefs[CallResource] = Def;
588  LiveRegGens[CallResource] = SU;
589  break;
590  }
591 }
592 
593 /// Check to see if any of the pending instructions are ready to issue. If
594 /// so, add them to the available queue.
595 void ScheduleDAGRRList::ReleasePending() {
596  if (DisableSchedCycles) {
597  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
598  return;
599  }
600 
601  // If the available queue is empty, it is safe to reset MinAvailableCycle.
602  if (AvailableQueue->empty())
603  MinAvailableCycle = std::numeric_limits<unsigned>::max();
604 
605  // Check to see if any of the pending instructions are ready to issue. If
606  // so, add them to the available queue.
607  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
608  unsigned ReadyCycle = PendingQueue[i]->getHeight();
609  if (ReadyCycle < MinAvailableCycle)
610  MinAvailableCycle = ReadyCycle;
611 
612  if (PendingQueue[i]->isAvailable) {
613  if (!isReady(PendingQueue[i]))
614  continue;
615  AvailableQueue->push(PendingQueue[i]);
616  }
617  PendingQueue[i]->isPending = false;
618  PendingQueue[i] = PendingQueue.back();
619  PendingQueue.pop_back();
620  --i; --e;
621  }
622 }
623 
624 /// Move the scheduler state forward by the specified number of Cycles.
625 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
626  if (NextCycle <= CurCycle)
627  return;
628 
629  IssueCount = 0;
630  AvailableQueue->setCurCycle(NextCycle);
631  if (!HazardRec->isEnabled()) {
632  // Bypass lots of virtual calls in case of long latency.
633  CurCycle = NextCycle;
634  }
635  else {
636  for (; CurCycle != NextCycle; ++CurCycle) {
637  HazardRec->RecedeCycle();
638  }
639  }
640  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
641  // available Q to release pending nodes at least once before popping.
642  ReleasePending();
643 }
644 
645 /// Move the scheduler state forward until the specified node's dependents are
646 /// ready and can be scheduled with no resource conflicts.
647 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
648  if (DisableSchedCycles)
649  return;
650 
651  // FIXME: Nodes such as CopyFromReg probably should not advance the current
652  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
653  // has predecessors the cycle will be advanced when they are scheduled.
654  // But given the crude nature of modeling latency though such nodes, we
655  // currently need to treat these nodes like real instructions.
656  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
657 
658  unsigned ReadyCycle = SU->getHeight();
659 
660  // Bump CurCycle to account for latency. We assume the latency of other
661  // available instructions may be hidden by the stall (not a full pipe stall).
662  // This updates the hazard recognizer's cycle before reserving resources for
663  // this instruction.
664  AdvanceToCycle(ReadyCycle);
665 
666  // Calls are scheduled in their preceding cycle, so don't conflict with
667  // hazards from instructions after the call. EmitNode will reset the
668  // scoreboard state before emitting the call.
669  if (SU->isCall)
670  return;
671 
672  // FIXME: For resource conflicts in very long non-pipelined stages, we
673  // should probably skip ahead here to avoid useless scoreboard checks.
674  int Stalls = 0;
675  while (true) {
677  HazardRec->getHazardType(SU, -Stalls);
678 
680  break;
681 
682  ++Stalls;
683  }
684  AdvanceToCycle(CurCycle + Stalls);
685 }
686 
687 /// Record this SUnit in the HazardRecognizer.
688 /// Does not update CurCycle.
689 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
690  if (!HazardRec->isEnabled())
691  return;
692 
693  // Check for phys reg copy.
694  if (!SU->getNode())
695  return;
696 
697  switch (SU->getNode()->getOpcode()) {
698  default:
699  assert(SU->getNode()->isMachineOpcode() &&
700  "This target-independent node should not be scheduled.");
701  break;
702  case ISD::MERGE_VALUES:
703  case ISD::TokenFactor:
704  case ISD::LIFETIME_START:
705  case ISD::LIFETIME_END:
706  case ISD::CopyToReg:
707  case ISD::CopyFromReg:
708  case ISD::EH_LABEL:
709  // Noops don't affect the scoreboard state. Copies are likely to be
710  // removed.
711  return;
712  case ISD::INLINEASM:
713  // For inline asm, clear the pipeline state.
714  HazardRec->Reset();
715  return;
716  }
717  if (SU->isCall) {
718  // Calls are scheduled with their preceding instructions. For bottom-up
719  // scheduling, clear the pipeline state before emitting.
720  HazardRec->Reset();
721  }
722 
723  HazardRec->EmitInstruction(SU);
724 }
725 
726 static void resetVRegCycle(SUnit *SU);
727 
728 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
729 /// count of its predecessors. If a predecessor pending count is zero, add it to
730 /// the Available queue.
731 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
732  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
733  DEBUG(SU->dump(this));
734 
735 #ifndef NDEBUG
736  if (CurCycle < SU->getHeight())
737  DEBUG(dbgs() << " Height [" << SU->getHeight()
738  << "] pipeline stall!\n");
739 #endif
740 
741  // FIXME: Do not modify node height. It may interfere with
742  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
743  // node its ready cycle can aid heuristics, and after scheduling it can
744  // indicate the scheduled cycle.
745  SU->setHeightToAtLeast(CurCycle);
746 
747  // Reserve resources for the scheduled instruction.
748  EmitNode(SU);
749 
750  Sequence.push_back(SU);
751 
752  AvailableQueue->scheduledNode(SU);
753 
754  // If HazardRec is disabled, and each inst counts as one cycle, then
755  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
756  // PendingQueue for schedulers that implement HasReadyFilter.
757  if (!HazardRec->isEnabled() && AvgIPC < 2)
758  AdvanceToCycle(CurCycle + 1);
759 
760  // Update liveness of predecessors before successors to avoid treating a
761  // two-address node as a live range def.
762  ReleasePredecessors(SU);
763 
764  // Release all the implicit physical register defs that are live.
765  for (SDep &Succ : SU->Succs) {
766  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
767  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
768  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
769  --NumLiveRegs;
770  LiveRegDefs[Succ.getReg()] = nullptr;
771  LiveRegGens[Succ.getReg()] = nullptr;
772  releaseInterferences(Succ.getReg());
773  }
774  }
775  // Release the special call resource dependence, if this is the beginning
776  // of a call.
777  unsigned CallResource = TRI->getNumRegs();
778  if (LiveRegDefs[CallResource] == SU)
779  for (const SDNode *SUNode = SU->getNode(); SUNode;
780  SUNode = SUNode->getGluedNode()) {
781  if (SUNode->isMachineOpcode() &&
782  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
783  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
784  --NumLiveRegs;
785  LiveRegDefs[CallResource] = nullptr;
786  LiveRegGens[CallResource] = nullptr;
787  releaseInterferences(CallResource);
788  }
789  }
790 
791  resetVRegCycle(SU);
792 
793  SU->isScheduled = true;
794 
795  // Conditions under which the scheduler should eagerly advance the cycle:
796  // (1) No available instructions
797  // (2) All pipelines full, so available instructions must have hazards.
798  //
799  // If HazardRec is disabled, the cycle was pre-advanced before calling
800  // ReleasePredecessors. In that case, IssueCount should remain 0.
801  //
802  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
803  if (HazardRec->isEnabled() || AvgIPC > 1) {
804  if (SU->getNode() && SU->getNode()->isMachineOpcode())
805  ++IssueCount;
806  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
807  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
808  AdvanceToCycle(CurCycle + 1);
809  }
810 }
811 
812 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
813 /// unscheduled, increase the succ left count of its predecessors. Remove
814 /// them from AvailableQueue if necessary.
815 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
816  SUnit *PredSU = PredEdge->getSUnit();
817  if (PredSU->isAvailable) {
818  PredSU->isAvailable = false;
819  if (!PredSU->isPending)
820  AvailableQueue->remove(PredSU);
821  }
822 
824  "NumSuccsLeft will overflow!");
825  ++PredSU->NumSuccsLeft;
826 }
827 
828 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
829 /// its predecessor states to reflect the change.
830 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
831  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
832  DEBUG(SU->dump(this));
833 
834  for (SDep &Pred : SU->Preds) {
835  CapturePred(&Pred);
836  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
837  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
838  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
839  "Physical register dependency violated?");
840  --NumLiveRegs;
841  LiveRegDefs[Pred.getReg()] = nullptr;
842  LiveRegGens[Pred.getReg()] = nullptr;
843  releaseInterferences(Pred.getReg());
844  }
845  }
846 
847  // Reclaim the special call resource dependence, if this is the beginning
848  // of a call.
849  unsigned CallResource = TRI->getNumRegs();
850  for (const SDNode *SUNode = SU->getNode(); SUNode;
851  SUNode = SUNode->getGluedNode()) {
852  if (SUNode->isMachineOpcode() &&
853  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
854  SUnit *SeqEnd = CallSeqEndForStart[SU];
855  assert(SeqEnd && "Call sequence start/end must be known");
856  assert(!LiveRegDefs[CallResource]);
857  assert(!LiveRegGens[CallResource]);
858  ++NumLiveRegs;
859  LiveRegDefs[CallResource] = SU;
860  LiveRegGens[CallResource] = SeqEnd;
861  }
862  }
863 
864  // Release the special call resource dependence, if this is the end
865  // of a call.
866  if (LiveRegGens[CallResource] == SU)
867  for (const SDNode *SUNode = SU->getNode(); SUNode;
868  SUNode = SUNode->getGluedNode()) {
869  if (SUNode->isMachineOpcode() &&
870  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
871  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
872  assert(LiveRegDefs[CallResource]);
873  assert(LiveRegGens[CallResource]);
874  --NumLiveRegs;
875  LiveRegDefs[CallResource] = nullptr;
876  LiveRegGens[CallResource] = nullptr;
877  releaseInterferences(CallResource);
878  }
879  }
880 
881  for (auto &Succ : SU->Succs) {
882  if (Succ.isAssignedRegDep()) {
883  auto Reg = Succ.getReg();
884  if (!LiveRegDefs[Reg])
885  ++NumLiveRegs;
886  // This becomes the nearest def. Note that an earlier def may still be
887  // pending if this is a two-address node.
888  LiveRegDefs[Reg] = SU;
889 
890  // Update LiveRegGen only if was empty before this unscheduling.
891  // This is to avoid incorrect updating LiveRegGen set in previous run.
892  if (!LiveRegGens[Reg]) {
893  // Find the successor with the lowest height.
894  LiveRegGens[Reg] = Succ.getSUnit();
895  for (auto &Succ2 : SU->Succs) {
896  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
897  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
898  LiveRegGens[Reg] = Succ2.getSUnit();
899  }
900  }
901  }
902  }
903  if (SU->getHeight() < MinAvailableCycle)
904  MinAvailableCycle = SU->getHeight();
905 
906  SU->setHeightDirty();
907  SU->isScheduled = false;
908  SU->isAvailable = true;
909  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
910  // Don't make available until backtracking is complete.
911  SU->isPending = true;
912  PendingQueue.push_back(SU);
913  }
914  else {
915  AvailableQueue->push(SU);
916  }
917  AvailableQueue->unscheduledNode(SU);
918 }
919 
920 /// After backtracking, the hazard checker needs to be restored to a state
921 /// corresponding the current cycle.
922 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
923  HazardRec->Reset();
924 
925  unsigned LookAhead = std::min((unsigned)Sequence.size(),
926  HazardRec->getMaxLookAhead());
927  if (LookAhead == 0)
928  return;
929 
930  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
931  unsigned HazardCycle = (*I)->getHeight();
932  for (auto E = Sequence.end(); I != E; ++I) {
933  SUnit *SU = *I;
934  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
935  HazardRec->RecedeCycle();
936  }
937  EmitNode(SU);
938  }
939 }
940 
941 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
942 /// BTCycle in order to schedule a specific node.
943 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
944  SUnit *OldSU = Sequence.back();
945  while (true) {
946  Sequence.pop_back();
947  // FIXME: use ready cycle instead of height
948  CurCycle = OldSU->getHeight();
949  UnscheduleNodeBottomUp(OldSU);
950  AvailableQueue->setCurCycle(CurCycle);
951  if (OldSU == BtSU)
952  break;
953  OldSU = Sequence.back();
954  }
955 
956  assert(!SU->isSucc(OldSU) && "Something is wrong!");
957 
958  RestoreHazardCheckerBottomUp();
959 
960  ReleasePending();
961 
962  ++NumBacktracks;
963 }
964 
965 static bool isOperandOf(const SUnit *SU, SDNode *N) {
966  for (const SDNode *SUNode = SU->getNode(); SUNode;
967  SUNode = SUNode->getGluedNode()) {
968  if (SUNode->isOperandOf(N))
969  return true;
970  }
971  return false;
972 }
973 
974 /// TryUnfold - Attempt to unfold
975 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
976  SDNode *N = SU->getNode();
977  // Use while over if to ease fall through.
978  SmallVector<SDNode *, 2> NewNodes;
979  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
980  return nullptr;
981 
982  // unfolding an x86 DEC64m operation results in store, dec, load which
983  // can't be handled here so quit
984  if (NewNodes.size() == 3)
985  return nullptr;
986 
987  assert(NewNodes.size() == 2 && "Expected a load folding node!");
988 
989  N = NewNodes[1];
990  SDNode *LoadNode = NewNodes[0];
991  unsigned NumVals = N->getNumValues();
992  unsigned OldNumVals = SU->getNode()->getNumValues();
993 
994  // LoadNode may already exist. This can happen when there is another
995  // load from the same location and producing the same type of value
996  // but it has different alignment or volatileness.
997  bool isNewLoad = true;
998  SUnit *LoadSU;
999  if (LoadNode->getNodeId() != -1) {
1000  LoadSU = &SUnits[LoadNode->getNodeId()];
1001  // If LoadSU has already been scheduled, we should clone it but
1002  // this would negate the benefit to unfolding so just return SU.
1003  if (LoadSU->isScheduled)
1004  return SU;
1005  isNewLoad = false;
1006  } else {
1007  LoadSU = CreateNewSUnit(LoadNode);
1008  LoadNode->setNodeId(LoadSU->NodeNum);
1009 
1010  InitNumRegDefsLeft(LoadSU);
1011  computeLatency(LoadSU);
1012  }
1013 
1014  DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1015 
1016  // Now that we are committed to unfolding replace DAG Uses.
1017  for (unsigned i = 0; i != NumVals; ++i)
1018  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1019  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1020  SDValue(LoadNode, 1));
1021 
1022  SUnit *NewSU = CreateNewSUnit(N);
1023  assert(N->getNodeId() == -1 && "Node already inserted!");
1024  N->setNodeId(NewSU->NodeNum);
1025 
1026  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1027  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1028  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1029  NewSU->isTwoAddress = true;
1030  break;
1031  }
1032  }
1033  if (MCID.isCommutable())
1034  NewSU->isCommutable = true;
1035 
1036  InitNumRegDefsLeft(NewSU);
1037  computeLatency(NewSU);
1038 
1039  // Record all the edges to and from the old SU, by category.
1040  SmallVector<SDep, 4> ChainPreds;
1041  SmallVector<SDep, 4> ChainSuccs;
1042  SmallVector<SDep, 4> LoadPreds;
1043  SmallVector<SDep, 4> NodePreds;
1044  SmallVector<SDep, 4> NodeSuccs;
1045  for (SDep &Pred : SU->Preds) {
1046  if (Pred.isCtrl())
1047  ChainPreds.push_back(Pred);
1048  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1049  LoadPreds.push_back(Pred);
1050  else
1051  NodePreds.push_back(Pred);
1052  }
1053  for (SDep &Succ : SU->Succs) {
1054  if (Succ.isCtrl())
1055  ChainSuccs.push_back(Succ);
1056  else
1057  NodeSuccs.push_back(Succ);
1058  }
1059 
1060  // Now assign edges to the newly-created nodes.
1061  for (const SDep &Pred : ChainPreds) {
1062  RemovePred(SU, Pred);
1063  if (isNewLoad)
1064  AddPred(LoadSU, Pred);
1065  }
1066  for (const SDep &Pred : LoadPreds) {
1067  RemovePred(SU, Pred);
1068  if (isNewLoad)
1069  AddPred(LoadSU, Pred);
1070  }
1071  for (const SDep &Pred : NodePreds) {
1072  RemovePred(SU, Pred);
1073  AddPred(NewSU, Pred);
1074  }
1075  for (SDep D : NodeSuccs) {
1076  SUnit *SuccDep = D.getSUnit();
1077  D.setSUnit(SU);
1078  RemovePred(SuccDep, D);
1079  D.setSUnit(NewSU);
1080  AddPred(SuccDep, D);
1081  // Balance register pressure.
1082  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1083  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1084  --NewSU->NumRegDefsLeft;
1085  }
1086  for (SDep D : ChainSuccs) {
1087  SUnit *SuccDep = D.getSUnit();
1088  D.setSUnit(SU);
1089  RemovePred(SuccDep, D);
1090  if (isNewLoad) {
1091  D.setSUnit(LoadSU);
1092  AddPred(SuccDep, D);
1093  }
1094  }
1095 
1096  // Add a data dependency to reflect that NewSU reads the value defined
1097  // by LoadSU.
1098  SDep D(LoadSU, SDep::Data, 0);
1099  D.setLatency(LoadSU->Latency);
1100  AddPred(NewSU, D);
1101 
1102  if (isNewLoad)
1103  AvailableQueue->addNode(LoadSU);
1104  AvailableQueue->addNode(NewSU);
1105 
1106  ++NumUnfolds;
1107 
1108  if (NewSU->NumSuccsLeft == 0)
1109  NewSU->isAvailable = true;
1110 
1111  return NewSU;
1112 }
1113 
1114 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1115 /// successors to the newly created node.
1116 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1117  SDNode *N = SU->getNode();
1118  if (!N)
1119  return nullptr;
1120 
1121  if (SU->getNode()->getGluedNode())
1122  return nullptr;
1123 
1124  SUnit *NewSU;
1125  bool TryUnfold = false;
1126  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1127  MVT VT = N->getSimpleValueType(i);
1128  if (VT == MVT::Glue)
1129  return nullptr;
1130  else if (VT == MVT::Other)
1131  TryUnfold = true;
1132  }
1133  for (const SDValue &Op : N->op_values()) {
1134  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1135  if (VT == MVT::Glue)
1136  return nullptr;
1137  }
1138 
1139  // If possible unfold instruction.
1140  if (TryUnfold) {
1141  SUnit *UnfoldSU = TryUnfoldSU(SU);
1142  if (!UnfoldSU)
1143  return nullptr;
1144  SU = UnfoldSU;
1145  N = SU->getNode();
1146  // If this can be scheduled don't bother duplicating and just return
1147  if (SU->NumSuccsLeft == 0)
1148  return SU;
1149  }
1150 
1151  DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1152  NewSU = CreateClone(SU);
1153 
1154  // New SUnit has the exact same predecessors.
1155  for (SDep &Pred : SU->Preds)
1156  if (!Pred.isArtificial())
1157  AddPred(NewSU, Pred);
1158 
1159  // Only copy scheduled successors. Cut them from old node's successor
1160  // list and move them over.
1162  for (SDep &Succ : SU->Succs) {
1163  if (Succ.isArtificial())
1164  continue;
1165  SUnit *SuccSU = Succ.getSUnit();
1166  if (SuccSU->isScheduled) {
1167  SDep D = Succ;
1168  D.setSUnit(NewSU);
1169  AddPred(SuccSU, D);
1170  D.setSUnit(SU);
1171  DelDeps.push_back(std::make_pair(SuccSU, D));
1172  }
1173  }
1174  for (auto &DelDep : DelDeps)
1175  RemovePred(DelDep.first, DelDep.second);
1176 
1177  AvailableQueue->updateNode(SU);
1178  AvailableQueue->addNode(NewSU);
1179 
1180  ++NumDups;
1181  return NewSU;
1182 }
1183 
1184 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1185 /// scheduled successors of the given SUnit to the last copy.
1186 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1187  const TargetRegisterClass *DestRC,
1188  const TargetRegisterClass *SrcRC,
1189  SmallVectorImpl<SUnit*> &Copies) {
1190  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1191  CopyFromSU->CopySrcRC = SrcRC;
1192  CopyFromSU->CopyDstRC = DestRC;
1193 
1194  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1195  CopyToSU->CopySrcRC = DestRC;
1196  CopyToSU->CopyDstRC = SrcRC;
1197 
1198  // Only copy scheduled successors. Cut them from old node's successor
1199  // list and move them over.
1201  for (SDep &Succ : SU->Succs) {
1202  if (Succ.isArtificial())
1203  continue;
1204  SUnit *SuccSU = Succ.getSUnit();
1205  if (SuccSU->isScheduled) {
1206  SDep D = Succ;
1207  D.setSUnit(CopyToSU);
1208  AddPred(SuccSU, D);
1209  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1210  }
1211  else {
1212  // Avoid scheduling the def-side copy before other successors. Otherwise
1213  // we could introduce another physreg interference on the copy and
1214  // continue inserting copies indefinitely.
1215  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1216  }
1217  }
1218  for (auto &DelDep : DelDeps)
1219  RemovePred(DelDep.first, DelDep.second);
1220 
1221  SDep FromDep(SU, SDep::Data, Reg);
1222  FromDep.setLatency(SU->Latency);
1223  AddPred(CopyFromSU, FromDep);
1224  SDep ToDep(CopyFromSU, SDep::Data, 0);
1225  ToDep.setLatency(CopyFromSU->Latency);
1226  AddPred(CopyToSU, ToDep);
1227 
1228  AvailableQueue->updateNode(SU);
1229  AvailableQueue->addNode(CopyFromSU);
1230  AvailableQueue->addNode(CopyToSU);
1231  Copies.push_back(CopyFromSU);
1232  Copies.push_back(CopyToSU);
1233 
1234  ++NumPRCopies;
1235 }
1236 
1237 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1238 /// definition of the specified node.
1239 /// FIXME: Move to SelectionDAG?
1240 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1241  const TargetInstrInfo *TII) {
1242  unsigned NumRes;
1243  if (N->getOpcode() == ISD::CopyFromReg) {
1244  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1245  NumRes = 1;
1246  } else {
1247  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1248  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1249  NumRes = MCID.getNumDefs();
1250  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1251  if (Reg == *ImpDef)
1252  break;
1253  ++NumRes;
1254  }
1255  }
1256  return N->getSimpleValueType(NumRes);
1257 }
1258 
1259 /// CheckForLiveRegDef - Return true and update live register vector if the
1260 /// specified register def of the specified SUnit clobbers any "live" registers.
1261 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1262  SUnit **LiveRegDefs,
1263  SmallSet<unsigned, 4> &RegAdded,
1265  const TargetRegisterInfo *TRI) {
1266  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1267 
1268  // Check if Ref is live.
1269  if (!LiveRegDefs[*AliasI]) continue;
1270 
1271  // Allow multiple uses of the same def.
1272  if (LiveRegDefs[*AliasI] == SU) continue;
1273 
1274  // Add Reg to the set of interfering live regs.
1275  if (RegAdded.insert(*AliasI).second) {
1276  LRegs.push_back(*AliasI);
1277  }
1278  }
1279 }
1280 
1281 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1282 /// by RegMask, and add them to LRegs.
1283 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1284  ArrayRef<SUnit*> LiveRegDefs,
1285  SmallSet<unsigned, 4> &RegAdded,
1286  SmallVectorImpl<unsigned> &LRegs) {
1287  // Look at all live registers. Skip Reg0 and the special CallResource.
1288  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1289  if (!LiveRegDefs[i]) continue;
1290  if (LiveRegDefs[i] == SU) continue;
1291  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1292  if (RegAdded.insert(i).second)
1293  LRegs.push_back(i);
1294  }
1295 }
1296 
1297 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1298 static const uint32_t *getNodeRegMask(const SDNode *N) {
1299  for (const SDValue &Op : N->op_values())
1300  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1301  return RegOp->getRegMask();
1302  return nullptr;
1303 }
1304 
1305 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1306 /// scheduling of the given node to satisfy live physical register dependencies.
1307 /// If the specific node is the last one that's available to schedule, do
1308 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1309 bool ScheduleDAGRRList::
1310 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1311  if (NumLiveRegs == 0)
1312  return false;
1313 
1314  SmallSet<unsigned, 4> RegAdded;
1315  // If this node would clobber any "live" register, then it's not ready.
1316  //
1317  // If SU is the currently live definition of the same register that it uses,
1318  // then we are free to schedule it.
1319  for (SDep &Pred : SU->Preds) {
1320  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1321  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1322  RegAdded, LRegs, TRI);
1323  }
1324 
1325  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1326  if (Node->getOpcode() == ISD::INLINEASM) {
1327  // Inline asm can clobber physical defs.
1328  unsigned NumOps = Node->getNumOperands();
1329  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1330  --NumOps; // Ignore the glue operand.
1331 
1332  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1333  unsigned Flags =
1334  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1335  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1336 
1337  ++i; // Skip the ID value.
1338  if (InlineAsm::isRegDefKind(Flags) ||
1340  InlineAsm::isClobberKind(Flags)) {
1341  // Check for def of register or earlyclobber register.
1342  for (; NumVals; --NumVals, ++i) {
1343  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1345  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1346  }
1347  } else
1348  i += NumVals;
1349  }
1350  continue;
1351  }
1352 
1353  if (!Node->isMachineOpcode())
1354  continue;
1355  // If we're in the middle of scheduling a call, don't begin scheduling
1356  // another call. Also, don't allow any physical registers to be live across
1357  // the call.
1358  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1359  // Check the special calling-sequence resource.
1360  unsigned CallResource = TRI->getNumRegs();
1361  if (LiveRegDefs[CallResource]) {
1362  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1363  while (SDNode *Glued = Gen->getGluedNode())
1364  Gen = Glued;
1365  if (!IsChainDependent(Gen, Node, 0, TII) &&
1366  RegAdded.insert(CallResource).second)
1367  LRegs.push_back(CallResource);
1368  }
1369  }
1370  if (const uint32_t *RegMask = getNodeRegMask(Node))
1371  CheckForLiveRegDefMasked(SU, RegMask,
1372  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1373  RegAdded, LRegs);
1374 
1375  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1376  if (MCID.hasOptionalDef()) {
1377  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1378  // This operand can be either a def of CPSR, if the S bit is set; or a use
1379  // of %noreg. When the OptionalDef is set to a valid register, we need to
1380  // handle it in the same way as an ImplicitDef.
1381  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1382  if (MCID.OpInfo[i].isOptionalDef()) {
1383  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1384  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1385  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1386  }
1387  }
1388  if (!MCID.ImplicitDefs)
1389  continue;
1390  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1391  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1392  }
1393 
1394  return !LRegs.empty();
1395 }
1396 
1397 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1398  // Add the nodes that aren't ready back onto the available list.
1399  for (unsigned i = Interferences.size(); i > 0; --i) {
1400  SUnit *SU = Interferences[i-1];
1401  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1402  if (Reg) {
1403  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1404  if (!is_contained(LRegs, Reg))
1405  continue;
1406  }
1407  SU->isPending = false;
1408  // The interfering node may no longer be available due to backtracking.
1409  // Furthermore, it may have been made available again, in which case it is
1410  // now already in the AvailableQueue.
1411  if (SU->isAvailable && !SU->NodeQueueId) {
1412  DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1413  AvailableQueue->push(SU);
1414  }
1415  if (i < Interferences.size())
1416  Interferences[i-1] = Interferences.back();
1417  Interferences.pop_back();
1418  LRegsMap.erase(LRegsPos);
1419  }
1420 }
1421 
1422 /// Return a node that can be scheduled in this cycle. Requirements:
1423 /// (1) Ready: latency has been satisfied
1424 /// (2) No Hazards: resources are available
1425 /// (3) No Interferences: may unschedule to break register interferences.
1426 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1427  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1428  auto FindAvailableNode = [&]() {
1429  while (CurSU) {
1431  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1432  break;
1433  DEBUG(dbgs() << " Interfering reg " <<
1434  (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1435  : TRI->getName(LRegs[0]))
1436  << " SU #" << CurSU->NodeNum << '\n');
1437  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1438  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1439  if (LRegsPair.second) {
1440  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1441  Interferences.push_back(CurSU);
1442  }
1443  else {
1444  assert(CurSU->isPending && "Interferences are pending");
1445  // Update the interference with current live regs.
1446  LRegsPair.first->second = LRegs;
1447  }
1448  CurSU = AvailableQueue->pop();
1449  }
1450  };
1451  FindAvailableNode();
1452  if (CurSU)
1453  return CurSU;
1454 
1455  // All candidates are delayed due to live physical reg dependencies.
1456  // Try backtracking, code duplication, or inserting cross class copies
1457  // to resolve it.
1458  for (SUnit *TrySU : Interferences) {
1459  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1460 
1461  // Try unscheduling up to the point where it's safe to schedule
1462  // this node.
1463  SUnit *BtSU = nullptr;
1464  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1465  for (unsigned Reg : LRegs) {
1466  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1467  BtSU = LiveRegGens[Reg];
1468  LiveCycle = BtSU->getHeight();
1469  }
1470  }
1471  if (!WillCreateCycle(TrySU, BtSU)) {
1472  // BacktrackBottomUp mutates Interferences!
1473  BacktrackBottomUp(TrySU, BtSU);
1474 
1475  // Force the current node to be scheduled before the node that
1476  // requires the physical reg dep.
1477  if (BtSU->isAvailable) {
1478  BtSU->isAvailable = false;
1479  if (!BtSU->isPending)
1480  AvailableQueue->remove(BtSU);
1481  }
1482  DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1483  << TrySU->NodeNum << ")\n");
1484  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1485 
1486  // If one or more successors has been unscheduled, then the current
1487  // node is no longer available.
1488  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1489  DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1490  CurSU = AvailableQueue->pop();
1491  } else {
1492  DEBUG(dbgs() << "TrySU available\n");
1493  // Available and in AvailableQueue
1494  AvailableQueue->remove(TrySU);
1495  CurSU = TrySU;
1496  }
1497  FindAvailableNode();
1498  // Interferences has been mutated. We must break.
1499  break;
1500  }
1501  }
1502 
1503  if (!CurSU) {
1504  // Can't backtrack. If it's too expensive to copy the value, then try
1505  // duplicate the nodes that produces these "too expensive to copy"
1506  // values to break the dependency. In case even that doesn't work,
1507  // insert cross class copies.
1508  // If it's not too expensive, i.e. cost != -1, issue copies.
1509  SUnit *TrySU = Interferences[0];
1510  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1511  assert(LRegs.size() == 1 && "Can't handle this yet!");
1512  unsigned Reg = LRegs[0];
1513  SUnit *LRDef = LiveRegDefs[Reg];
1514  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1515  const TargetRegisterClass *RC =
1516  TRI->getMinimalPhysRegClass(Reg, VT);
1517  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1518 
1519  // If cross copy register class is the same as RC, then it must be possible
1520  // copy the value directly. Do not try duplicate the def.
1521  // If cross copy register class is not the same as RC, then it's possible to
1522  // copy the value but it require cross register class copies and it is
1523  // expensive.
1524  // If cross copy register class is null, then it's not possible to copy
1525  // the value at all.
1526  SUnit *NewDef = nullptr;
1527  if (DestRC != RC) {
1528  NewDef = CopyAndMoveSuccessors(LRDef);
1529  if (!DestRC && !NewDef)
1530  report_fatal_error("Can't handle live physical register dependency!");
1531  }
1532  if (!NewDef) {
1533  // Issue copies, these can be expensive cross register class copies.
1534  SmallVector<SUnit*, 2> Copies;
1535  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1536  DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1537  << " to SU #" << Copies.front()->NodeNum << "\n");
1538  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1539  NewDef = Copies.back();
1540  }
1541 
1542  DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1543  << " to SU #" << TrySU->NodeNum << "\n");
1544  LiveRegDefs[Reg] = NewDef;
1545  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1546  TrySU->isAvailable = false;
1547  CurSU = NewDef;
1548  }
1549  assert(CurSU && "Unable to resolve live physical register dependencies!");
1550  return CurSU;
1551 }
1552 
1553 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1554 /// schedulers.
1555 void ScheduleDAGRRList::ListScheduleBottomUp() {
1556  // Release any predecessors of the special Exit node.
1557  ReleasePredecessors(&ExitSU);
1558 
1559  // Add root to Available queue.
1560  if (!SUnits.empty()) {
1561  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1562  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1563  RootSU->isAvailable = true;
1564  AvailableQueue->push(RootSU);
1565  }
1566 
1567  // While Available queue is not empty, grab the node with the highest
1568  // priority. If it is not ready put it back. Schedule the node.
1569  Sequence.reserve(SUnits.size());
1570  while (!AvailableQueue->empty() || !Interferences.empty()) {
1571  DEBUG(dbgs() << "\nExamining Available:\n";
1572  AvailableQueue->dump(this));
1573 
1574  // Pick the best node to schedule taking all constraints into
1575  // consideration.
1576  SUnit *SU = PickNodeToScheduleBottomUp();
1577 
1578  AdvancePastStalls(SU);
1579 
1580  ScheduleNodeBottomUp(SU);
1581 
1582  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1583  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1584  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1585  "MinAvailableCycle uninitialized");
1586  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1587  }
1588  }
1589 
1590  // Reverse the order if it is bottom up.
1591  std::reverse(Sequence.begin(), Sequence.end());
1592 
1593 #ifndef NDEBUG
1594  VerifyScheduledSequence(/*isBottomUp=*/true);
1595 #endif
1596 }
1597 
1598 namespace {
1599 
1600 class RegReductionPQBase;
1601 
1602 struct queue_sort {
1603  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1604 };
1605 
1606 #ifndef NDEBUG
1607 template<class SF>
1608 struct reverse_sort : public queue_sort {
1609  SF &SortFunc;
1610 
1611  reverse_sort(SF &sf) : SortFunc(sf) {}
1612 
1613  bool operator()(SUnit* left, SUnit* right) const {
1614  // reverse left/right rather than simply !SortFunc(left, right)
1615  // to expose different paths in the comparison logic.
1616  return SortFunc(right, left);
1617  }
1618 };
1619 #endif // NDEBUG
1620 
1621 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1622 // reduction scheduler.
1623 struct bu_ls_rr_sort : public queue_sort {
1624  enum {
1625  IsBottomUp = true,
1626  HasReadyFilter = false
1627  };
1628 
1629  RegReductionPQBase *SPQ;
1630 
1631  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1632 
1633  bool operator()(SUnit* left, SUnit* right) const;
1634 };
1635 
1636 // src_ls_rr_sort - Priority function for source order scheduler.
1637 struct src_ls_rr_sort : public queue_sort {
1638  enum {
1639  IsBottomUp = true,
1640  HasReadyFilter = false
1641  };
1642 
1643  RegReductionPQBase *SPQ;
1644 
1645  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1646 
1647  bool operator()(SUnit* left, SUnit* right) const;
1648 };
1649 
1650 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1651 struct hybrid_ls_rr_sort : public queue_sort {
1652  enum {
1653  IsBottomUp = true,
1654  HasReadyFilter = false
1655  };
1656 
1657  RegReductionPQBase *SPQ;
1658 
1659  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1660 
1661  bool isReady(SUnit *SU, unsigned CurCycle) const;
1662 
1663  bool operator()(SUnit* left, SUnit* right) const;
1664 };
1665 
1666 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1667 // scheduler.
1668 struct ilp_ls_rr_sort : public queue_sort {
1669  enum {
1670  IsBottomUp = true,
1671  HasReadyFilter = false
1672  };
1673 
1674  RegReductionPQBase *SPQ;
1675 
1676  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1677 
1678  bool isReady(SUnit *SU, unsigned CurCycle) const;
1679 
1680  bool operator()(SUnit* left, SUnit* right) const;
1681 };
1682 
1683 class RegReductionPQBase : public SchedulingPriorityQueue {
1684 protected:
1685  std::vector<SUnit *> Queue;
1686  unsigned CurQueueId = 0;
1687  bool TracksRegPressure;
1688  bool SrcOrder;
1689 
1690  // SUnits - The SUnits for the current graph.
1691  std::vector<SUnit> *SUnits;
1692 
1693  MachineFunction &MF;
1694  const TargetInstrInfo *TII;
1695  const TargetRegisterInfo *TRI;
1696  const TargetLowering *TLI;
1697  ScheduleDAGRRList *scheduleDAG = nullptr;
1698 
1699  // SethiUllmanNumbers - The SethiUllman number for each node.
1700  std::vector<unsigned> SethiUllmanNumbers;
1701 
1702  /// RegPressure - Tracking current reg pressure per register class.
1703  std::vector<unsigned> RegPressure;
1704 
1705  /// RegLimit - Tracking the number of allocatable registers per register
1706  /// class.
1707  std::vector<unsigned> RegLimit;
1708 
1709 public:
1710  RegReductionPQBase(MachineFunction &mf,
1711  bool hasReadyFilter,
1712  bool tracksrp,
1713  bool srcorder,
1714  const TargetInstrInfo *tii,
1715  const TargetRegisterInfo *tri,
1716  const TargetLowering *tli)
1717  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1718  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1719  if (TracksRegPressure) {
1720  unsigned NumRC = TRI->getNumRegClasses();
1721  RegLimit.resize(NumRC);
1722  RegPressure.resize(NumRC);
1723  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1724  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1725  for (const TargetRegisterClass *RC : TRI->regclasses())
1726  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1727  }
1728  }
1729 
1730  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1731  scheduleDAG = scheduleDag;
1732  }
1733 
1734  ScheduleHazardRecognizer* getHazardRec() {
1735  return scheduleDAG->getHazardRec();
1736  }
1737 
1738  void initNodes(std::vector<SUnit> &sunits) override;
1739 
1740  void addNode(const SUnit *SU) override;
1741 
1742  void updateNode(const SUnit *SU) override;
1743 
1744  void releaseState() override {
1745  SUnits = nullptr;
1746  SethiUllmanNumbers.clear();
1747  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1748  }
1749 
1750  unsigned getNodePriority(const SUnit *SU) const;
1751 
1752  unsigned getNodeOrdering(const SUnit *SU) const {
1753  if (!SU->getNode()) return 0;
1754 
1755  return SU->getNode()->getIROrder();
1756  }
1757 
1758  bool empty() const override { return Queue.empty(); }
1759 
1760  void push(SUnit *U) override {
1761  assert(!U->NodeQueueId && "Node in the queue already");
1762  U->NodeQueueId = ++CurQueueId;
1763  Queue.push_back(U);
1764  }
1765 
1766  void remove(SUnit *SU) override {
1767  assert(!Queue.empty() && "Queue is empty!");
1768  assert(SU->NodeQueueId != 0 && "Not in queue!");
1769  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1770  if (I != std::prev(Queue.end()))
1771  std::swap(*I, Queue.back());
1772  Queue.pop_back();
1773  SU->NodeQueueId = 0;
1774  }
1775 
1776  bool tracksRegPressure() const override { return TracksRegPressure; }
1777 
1778  void dumpRegPressure() const;
1779 
1780  bool HighRegPressure(const SUnit *SU) const;
1781 
1782  bool MayReduceRegPressure(SUnit *SU) const;
1783 
1784  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1785 
1786  void scheduledNode(SUnit *SU) override;
1787 
1788  void unscheduledNode(SUnit *SU) override;
1789 
1790 protected:
1791  bool canClobber(const SUnit *SU, const SUnit *Op);
1792  void AddPseudoTwoAddrDeps();
1793  void PrescheduleNodesWithMultipleUses();
1794  void CalculateSethiUllmanNumbers();
1795 };
1796 
1797 template<class SF>
1798 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1799  std::vector<SUnit *>::iterator Best = Q.begin();
1800  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1801  if (Picker(*Best, *I))
1802  Best = I;
1803  SUnit *V = *Best;
1804  if (Best != std::prev(Q.end()))
1805  std::swap(*Best, Q.back());
1806  Q.pop_back();
1807  return V;
1808 }
1809 
1810 template<class SF>
1811 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1812 #ifndef NDEBUG
1813  if (DAG->StressSched) {
1814  reverse_sort<SF> RPicker(Picker);
1815  return popFromQueueImpl(Q, RPicker);
1816  }
1817 #endif
1818  (void)DAG;
1819  return popFromQueueImpl(Q, Picker);
1820 }
1821 
1822 //===----------------------------------------------------------------------===//
1823 // RegReductionPriorityQueue Definition
1824 //===----------------------------------------------------------------------===//
1825 //
1826 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1827 // to reduce register pressure.
1828 //
1829 template<class SF>
1830 class RegReductionPriorityQueue : public RegReductionPQBase {
1831  SF Picker;
1832 
1833 public:
1834  RegReductionPriorityQueue(MachineFunction &mf,
1835  bool tracksrp,
1836  bool srcorder,
1837  const TargetInstrInfo *tii,
1838  const TargetRegisterInfo *tri,
1839  const TargetLowering *tli)
1840  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1841  tii, tri, tli),
1842  Picker(this) {}
1843 
1844  bool isBottomUp() const override { return SF::IsBottomUp; }
1845 
1846  bool isReady(SUnit *U) const override {
1847  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1848  }
1849 
1850  SUnit *pop() override {
1851  if (Queue.empty()) return nullptr;
1852 
1853  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1854  V->NodeQueueId = 0;
1855  return V;
1856  }
1857 
1858 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1859  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1860  // Emulate pop() without clobbering NodeQueueIds.
1861  std::vector<SUnit *> DumpQueue = Queue;
1862  SF DumpPicker = Picker;
1863  while (!DumpQueue.empty()) {
1864  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1865  dbgs() << "Height " << SU->getHeight() << ": ";
1866  SU->dump(DAG);
1867  }
1868  }
1869 #endif
1870 };
1871 
1872 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1873 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1874 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1875 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1876 
1877 } // end anonymous namespace
1878 
1879 //===----------------------------------------------------------------------===//
1880 // Static Node Priority for Register Pressure Reduction
1881 //===----------------------------------------------------------------------===//
1882 
1883 // Check for special nodes that bypass scheduling heuristics.
1884 // Currently this pushes TokenFactor nodes down, but may be used for other
1885 // pseudo-ops as well.
1886 //
1887 // Return -1 to schedule right above left, 1 for left above right.
1888 // Return 0 if no bias exists.
1889 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1890  bool LSchedLow = left->isScheduleLow;
1891  bool RSchedLow = right->isScheduleLow;
1892  if (LSchedLow != RSchedLow)
1893  return LSchedLow < RSchedLow ? 1 : -1;
1894  return 0;
1895 }
1896 
1897 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1898 /// Smaller number is the higher priority.
1899 static unsigned
1900 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1901  if (SUNumbers[SU->NodeNum] != 0)
1902  return SUNumbers[SU->NodeNum];
1903 
1904  // Use WorkList to avoid stack overflow on excessively large IRs.
1905  struct WorkState {
1906  WorkState(const SUnit *SU) : SU(SU) {}
1907  const SUnit *SU;
1908  unsigned PredsProcessed = 0;
1909  };
1910 
1911  SmallVector<WorkState, 16> WorkList;
1912  WorkList.push_back(SU);
1913  while (!WorkList.empty()) {
1914  auto &Temp = WorkList.back();
1915  auto *TempSU = Temp.SU;
1916  bool AllPredsKnown = true;
1917  // Try to find a non-evaluated pred and push it into the processing stack.
1918  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1919  auto &Pred = TempSU->Preds[P];
1920  if (Pred.isCtrl()) continue; // ignore chain preds
1921  SUnit *PredSU = Pred.getSUnit();
1922  if (SUNumbers[PredSU->NodeNum] == 0) {
1923 #ifndef NDEBUG
1924  // In debug mode, check that we don't have such element in the stack.
1925  for (auto It : WorkList)
1926  assert(It.SU != PredSU && "Trying to push an element twice?");
1927 #endif
1928  // Next time start processing this one starting from the next pred.
1929  Temp.PredsProcessed = P + 1;
1930  WorkList.push_back(PredSU);
1931  AllPredsKnown = false;
1932  break;
1933  }
1934  }
1935 
1936  if (!AllPredsKnown)
1937  continue;
1938 
1939  // Once all preds are known, we can calculate the answer for this one.
1940  unsigned SethiUllmanNumber = 0;
1941  unsigned Extra = 0;
1942  for (const SDep &Pred : TempSU->Preds) {
1943  if (Pred.isCtrl()) continue; // ignore chain preds
1944  SUnit *PredSU = Pred.getSUnit();
1945  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1946  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1947  if (PredSethiUllman > SethiUllmanNumber) {
1948  SethiUllmanNumber = PredSethiUllman;
1949  Extra = 0;
1950  } else if (PredSethiUllman == SethiUllmanNumber)
1951  ++Extra;
1952  }
1953 
1954  SethiUllmanNumber += Extra;
1955  if (SethiUllmanNumber == 0)
1956  SethiUllmanNumber = 1;
1957  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1958  WorkList.pop_back();
1959  }
1960 
1961  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1962  return SUNumbers[SU->NodeNum];
1963 }
1964 
1965 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1966 /// scheduling units.
1967 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1968  SethiUllmanNumbers.assign(SUnits->size(), 0);
1969 
1970  for (const SUnit &SU : *SUnits)
1971  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1972 }
1973 
1974 void RegReductionPQBase::addNode(const SUnit *SU) {
1975  unsigned SUSize = SethiUllmanNumbers.size();
1976  if (SUnits->size() > SUSize)
1977  SethiUllmanNumbers.resize(SUSize*2, 0);
1978  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1979 }
1980 
1981 void RegReductionPQBase::updateNode(const SUnit *SU) {
1982  SethiUllmanNumbers[SU->NodeNum] = 0;
1983  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1984 }
1985 
1986 // Lower priority means schedule further down. For bottom-up scheduling, lower
1987 // priority SUs are scheduled before higher priority SUs.
1988 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1989  assert(SU->NodeNum < SethiUllmanNumbers.size());
1990  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1991  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1992  // CopyToReg should be close to its uses to facilitate coalescing and
1993  // avoid spilling.
1994  return 0;
1995  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1996  Opc == TargetOpcode::SUBREG_TO_REG ||
1997  Opc == TargetOpcode::INSERT_SUBREG)
1998  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1999  // close to their uses to facilitate coalescing.
2000  return 0;
2001  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2002  // If SU does not have a register use, i.e. it doesn't produce a value
2003  // that would be consumed (e.g. store), then it terminates a chain of
2004  // computation. Give it a large SethiUllman number so it will be
2005  // scheduled right before its predecessors that it doesn't lengthen
2006  // their live ranges.
2007  return 0xffff;
2008  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2009  // If SU does not have a register def, schedule it close to its uses
2010  // because it does not lengthen any live ranges.
2011  return 0;
2012 #if 1
2013  return SethiUllmanNumbers[SU->NodeNum];
2014 #else
2015  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2016  if (SU->isCallOp) {
2017  // FIXME: This assumes all of the defs are used as call operands.
2018  int NP = (int)Priority - SU->getNode()->getNumValues();
2019  return (NP > 0) ? NP : 0;
2020  }
2021  return Priority;
2022 #endif
2023 }
2024 
2025 //===----------------------------------------------------------------------===//
2026 // Register Pressure Tracking
2027 //===----------------------------------------------------------------------===//
2028 
2029 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2030 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2031  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2032  unsigned Id = RC->getID();
2033  unsigned RP = RegPressure[Id];
2034  if (!RP) continue;
2035  DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2036  << RegLimit[Id] << '\n');
2037  }
2038 }
2039 #endif
2040 
2041 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2042  if (!TLI)
2043  return false;
2044 
2045  for (const SDep &Pred : SU->Preds) {
2046  if (Pred.isCtrl())
2047  continue;
2048  SUnit *PredSU = Pred.getSUnit();
2049  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2050  // to cover the number of registers defined (they are all live).
2051  if (PredSU->NumRegDefsLeft == 0) {
2052  continue;
2053  }
2054  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2055  RegDefPos.IsValid(); RegDefPos.Advance()) {
2056  unsigned RCId, Cost;
2057  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2058 
2059  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2060  return true;
2061  }
2062  }
2063  return false;
2064 }
2065 
2066 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2067  const SDNode *N = SU->getNode();
2068 
2069  if (!N->isMachineOpcode() || !SU->NumSuccs)
2070  return false;
2071 
2072  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2073  for (unsigned i = 0; i != NumDefs; ++i) {
2074  MVT VT = N->getSimpleValueType(i);
2075  if (!N->hasAnyUseOfValue(i))
2076  continue;
2077  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2078  if (RegPressure[RCId] >= RegLimit[RCId])
2079  return true;
2080  }
2081  return false;
2082 }
2083 
2084 // Compute the register pressure contribution by this instruction by count up
2085 // for uses that are not live and down for defs. Only count register classes
2086 // that are already under high pressure. As a side effect, compute the number of
2087 // uses of registers that are already live.
2088 //
2089 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2090 // so could probably be factored.
2091 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2092  LiveUses = 0;
2093  int PDiff = 0;
2094  for (const SDep &Pred : SU->Preds) {
2095  if (Pred.isCtrl())
2096  continue;
2097  SUnit *PredSU = Pred.getSUnit();
2098  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2099  // to cover the number of registers defined (they are all live).
2100  if (PredSU->NumRegDefsLeft == 0) {
2101  if (PredSU->getNode()->isMachineOpcode())
2102  ++LiveUses;
2103  continue;
2104  }
2105  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2106  RegDefPos.IsValid(); RegDefPos.Advance()) {
2107  MVT VT = RegDefPos.GetValue();
2108  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2109  if (RegPressure[RCId] >= RegLimit[RCId])
2110  ++PDiff;
2111  }
2112  }
2113  const SDNode *N = SU->getNode();
2114 
2115  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2116  return PDiff;
2117 
2118  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2119  for (unsigned i = 0; i != NumDefs; ++i) {
2120  MVT VT = N->getSimpleValueType(i);
2121  if (!N->hasAnyUseOfValue(i))
2122  continue;
2123  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2124  if (RegPressure[RCId] >= RegLimit[RCId])
2125  --PDiff;
2126  }
2127  return PDiff;
2128 }
2129 
2130 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2131  if (!TracksRegPressure)
2132  return;
2133 
2134  if (!SU->getNode())
2135  return;
2136 
2137  for (const SDep &Pred : SU->Preds) {
2138  if (Pred.isCtrl())
2139  continue;
2140  SUnit *PredSU = Pred.getSUnit();
2141  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2142  // to cover the number of registers defined (they are all live).
2143  if (PredSU->NumRegDefsLeft == 0) {
2144  continue;
2145  }
2146  // FIXME: The ScheduleDAG currently loses information about which of a
2147  // node's values is consumed by each dependence. Consequently, if the node
2148  // defines multiple register classes, we don't know which to pressurize
2149  // here. Instead the following loop consumes the register defs in an
2150  // arbitrary order. At least it handles the common case of clustered loads
2151  // to the same class. For precise liveness, each SDep needs to indicate the
2152  // result number. But that tightly couples the ScheduleDAG with the
2153  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2154  // value type or register class to SDep.
2155  //
2156  // The most important aspect of register tracking is balancing the increase
2157  // here with the reduction further below. Note that this SU may use multiple
2158  // defs in PredSU. The can't be determined here, but we've already
2159  // compensated by reducing NumRegDefsLeft in PredSU during
2160  // ScheduleDAGSDNodes::AddSchedEdges.
2161  --PredSU->NumRegDefsLeft;
2162  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2163  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2164  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2165  if (SkipRegDefs)
2166  continue;
2167 
2168  unsigned RCId, Cost;
2169  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2170  RegPressure[RCId] += Cost;
2171  break;
2172  }
2173  }
2174 
2175  // We should have this assert, but there may be dead SDNodes that never
2176  // materialize as SUnits, so they don't appear to generate liveness.
2177  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2178  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2179  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2180  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2181  if (SkipRegDefs > 0)
2182  continue;
2183  unsigned RCId, Cost;
2184  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2185  if (RegPressure[RCId] < Cost) {
2186  // Register pressure tracking is imprecise. This can happen. But we try
2187  // hard not to let it happen because it likely results in poor scheduling.
2188  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
2189  RegPressure[RCId] = 0;
2190  }
2191  else {
2192  RegPressure[RCId] -= Cost;
2193  }
2194  }
2195  DEBUG(dumpRegPressure());
2196 }
2197 
2198 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2199  if (!TracksRegPressure)
2200  return;
2201 
2202  const SDNode *N = SU->getNode();
2203  if (!N) return;
2204 
2205  if (!N->isMachineOpcode()) {
2206  if (N->getOpcode() != ISD::CopyToReg)
2207  return;
2208  } else {
2209  unsigned Opc = N->getMachineOpcode();
2210  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2211  Opc == TargetOpcode::INSERT_SUBREG ||
2212  Opc == TargetOpcode::SUBREG_TO_REG ||
2213  Opc == TargetOpcode::REG_SEQUENCE ||
2214  Opc == TargetOpcode::IMPLICIT_DEF)
2215  return;
2216  }
2217 
2218  for (const SDep &Pred : SU->Preds) {
2219  if (Pred.isCtrl())
2220  continue;
2221  SUnit *PredSU = Pred.getSUnit();
2222  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2223  // counts data deps.
2224  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2225  continue;
2226  const SDNode *PN = PredSU->getNode();
2227  if (!PN->isMachineOpcode()) {
2228  if (PN->getOpcode() == ISD::CopyFromReg) {
2229  MVT VT = PN->getSimpleValueType(0);
2230  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2231  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2232  }
2233  continue;
2234  }
2235  unsigned POpc = PN->getMachineOpcode();
2236  if (POpc == TargetOpcode::IMPLICIT_DEF)
2237  continue;
2238  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2239  POpc == TargetOpcode::INSERT_SUBREG ||
2240  POpc == TargetOpcode::SUBREG_TO_REG) {
2241  MVT VT = PN->getSimpleValueType(0);
2242  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2243  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2244  continue;
2245  }
2246  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2247  for (unsigned i = 0; i != NumDefs; ++i) {
2248  MVT VT = PN->getSimpleValueType(i);
2249  if (!PN->hasAnyUseOfValue(i))
2250  continue;
2251  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2252  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2253  // Register pressure tracking is imprecise. This can happen.
2254  RegPressure[RCId] = 0;
2255  else
2256  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2257  }
2258  }
2259 
2260  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2261  // may transfer data dependencies to CopyToReg.
2262  if (SU->NumSuccs && N->isMachineOpcode()) {
2263  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2264  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2265  MVT VT = N->getSimpleValueType(i);
2266  if (VT == MVT::Glue || VT == MVT::Other)
2267  continue;
2268  if (!N->hasAnyUseOfValue(i))
2269  continue;
2270  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2271  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2272  }
2273  }
2274 
2275  DEBUG(dumpRegPressure());
2276 }
2277 
2278 //===----------------------------------------------------------------------===//
2279 // Dynamic Node Priority for Register Pressure Reduction
2280 //===----------------------------------------------------------------------===//
2281 
2282 /// closestSucc - Returns the scheduled cycle of the successor which is
2283 /// closest to the current cycle.
2284 static unsigned closestSucc(const SUnit *SU) {
2285  unsigned MaxHeight = 0;
2286  for (const SDep &Succ : SU->Succs) {
2287  if (Succ.isCtrl()) continue; // ignore chain succs
2288  unsigned Height = Succ.getSUnit()->getHeight();
2289  // If there are bunch of CopyToRegs stacked up, they should be considered
2290  // to be at the same position.
2291  if (Succ.getSUnit()->getNode() &&
2292  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2293  Height = closestSucc(Succ.getSUnit())+1;
2294  if (Height > MaxHeight)
2295  MaxHeight = Height;
2296  }
2297  return MaxHeight;
2298 }
2299 
2300 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2301 /// for scratch registers, i.e. number of data dependencies.
2302 static unsigned calcMaxScratches(const SUnit *SU) {
2303  unsigned Scratches = 0;
2304  for (const SDep &Pred : SU->Preds) {
2305  if (Pred.isCtrl()) continue; // ignore chain preds
2306  Scratches++;
2307  }
2308  return Scratches;
2309 }
2310 
2311 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2312 /// CopyFromReg from a virtual register.
2313 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2314  bool RetVal = false;
2315  for (const SDep &Pred : SU->Preds) {
2316  if (Pred.isCtrl()) continue;
2317  const SUnit *PredSU = Pred.getSUnit();
2318  if (PredSU->getNode() &&
2319  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2320  unsigned Reg =
2321  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2323  RetVal = true;
2324  continue;
2325  }
2326  }
2327  return false;
2328  }
2329  return RetVal;
2330 }
2331 
2332 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2333 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2334 /// it has no other use. It should be scheduled closer to the terminator.
2335 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2336  bool RetVal = false;
2337  for (const SDep &Succ : SU->Succs) {
2338  if (Succ.isCtrl()) continue;
2339  const SUnit *SuccSU = Succ.getSUnit();
2340  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2341  unsigned Reg =
2342  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2344  RetVal = true;
2345  continue;
2346  }
2347  }
2348  return false;
2349  }
2350  return RetVal;
2351 }
2352 
2353 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2354 // set isVRegCycle for its CopyFromReg operands.
2355 //
2356 // This is only relevant for single-block loops, in which case the VRegCycle
2357 // node is likely an induction variable in which the operand and target virtual
2358 // registers should be coalesced (e.g. pre/post increment values). Setting the
2359 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2360 // CopyFromReg so that this node becomes the virtual register "kill". This
2361 // avoids interference between the values live in and out of the block and
2362 // eliminates a copy inside the loop.
2363 static void initVRegCycle(SUnit *SU) {
2365  return;
2366 
2367  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2368  return;
2369 
2370  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2371 
2372  SU->isVRegCycle = true;
2373 
2374  for (const SDep &Pred : SU->Preds) {
2375  if (Pred.isCtrl()) continue;
2376  Pred.getSUnit()->isVRegCycle = true;
2377  }
2378 }
2379 
2380 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2381 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2382 static void resetVRegCycle(SUnit *SU) {
2383  if (!SU->isVRegCycle)
2384  return;
2385 
2386  for (const SDep &Pred : SU->Preds) {
2387  if (Pred.isCtrl()) continue; // ignore chain preds
2388  SUnit *PredSU = Pred.getSUnit();
2389  if (PredSU->isVRegCycle) {
2390  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2391  "VRegCycle def must be CopyFromReg");
2392  Pred.getSUnit()->isVRegCycle = false;
2393  }
2394  }
2395 }
2396 
2397 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2398 // means a node that defines the VRegCycle has not been scheduled yet.
2399 static bool hasVRegCycleUse(const SUnit *SU) {
2400  // If this SU also defines the VReg, don't hoist it as a "use".
2401  if (SU->isVRegCycle)
2402  return false;
2403 
2404  for (const SDep &Pred : SU->Preds) {
2405  if (Pred.isCtrl()) continue; // ignore chain preds
2406  if (Pred.getSUnit()->isVRegCycle &&
2407  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2408  DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2409  return true;
2410  }
2411  }
2412  return false;
2413 }
2414 
2415 // Check for either a dependence (latency) or resource (hazard) stall.
2416 //
2417 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2418 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2419  if ((int)SPQ->getCurCycle() < Height) return true;
2420  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2422  return true;
2423  return false;
2424 }
2425 
2426 // Return -1 if left has higher priority, 1 if right has higher priority.
2427 // Return 0 if latency-based priority is equivalent.
2428 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2429  RegReductionPQBase *SPQ) {
2430  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2431  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2432  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2433  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2434  int LHeight = (int)left->getHeight() + LPenalty;
2435  int RHeight = (int)right->getHeight() + RPenalty;
2436 
2437  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2438  BUHasStall(left, LHeight, SPQ);
2439  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2440  BUHasStall(right, RHeight, SPQ);
2441 
2442  // If scheduling one of the node will cause a pipeline stall, delay it.
2443  // If scheduling either one of the node will cause a pipeline stall, sort
2444  // them according to their height.
2445  if (LStall) {
2446  if (!RStall)
2447  return 1;
2448  if (LHeight != RHeight)
2449  return LHeight > RHeight ? 1 : -1;
2450  } else if (RStall)
2451  return -1;
2452 
2453  // If either node is scheduling for latency, sort them by height/depth
2454  // and latency.
2455  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2456  right->SchedulingPref == Sched::ILP)) {
2457  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2458  // is enabled, grouping instructions by cycle, then its height is already
2459  // covered so only its depth matters. We also reach this point if both stall
2460  // but have the same height.
2461  if (!SPQ->getHazardRec()->isEnabled()) {
2462  if (LHeight != RHeight)
2463  return LHeight > RHeight ? 1 : -1;
2464  }
2465  int LDepth = left->getDepth() - LPenalty;
2466  int RDepth = right->getDepth() - RPenalty;
2467  if (LDepth != RDepth) {
2468  DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2469  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2470  << ") depth " << RDepth << "\n");
2471  return LDepth < RDepth ? 1 : -1;
2472  }
2473  if (left->Latency != right->Latency)
2474  return left->Latency > right->Latency ? 1 : -1;
2475  }
2476  return 0;
2477 }
2478 
2479 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2480  // Schedule physical register definitions close to their use. This is
2481  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2482  // long as shortening physreg live ranges is generally good, we can defer
2483  // creating a subtarget hook.
2484  if (!DisableSchedPhysRegJoin) {
2485  bool LHasPhysReg = left->hasPhysRegDefs;
2486  bool RHasPhysReg = right->hasPhysRegDefs;
2487  if (LHasPhysReg != RHasPhysReg) {
2488  #ifndef NDEBUG
2489  static const char *const PhysRegMsg[] = { " has no physreg",
2490  " defines a physreg" };
2491  #endif
2492  DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2493  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2494  << PhysRegMsg[RHasPhysReg] << "\n");
2495  return LHasPhysReg < RHasPhysReg;
2496  }
2497  }
2498 
2499  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2500  unsigned LPriority = SPQ->getNodePriority(left);
2501  unsigned RPriority = SPQ->getNodePriority(right);
2502 
2503  // Be really careful about hoisting call operands above previous calls.
2504  // Only allows it if it would reduce register pressure.
2505  if (left->isCall && right->isCallOp) {
2506  unsigned RNumVals = right->getNode()->getNumValues();
2507  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2508  }
2509  if (right->isCall && left->isCallOp) {
2510  unsigned LNumVals = left->getNode()->getNumValues();
2511  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2512  }
2513 
2514  if (LPriority != RPriority)
2515  return LPriority > RPriority;
2516 
2517  // One or both of the nodes are calls and their sethi-ullman numbers are the
2518  // same, then keep source order.
2519  if (left->isCall || right->isCall) {
2520  unsigned LOrder = SPQ->getNodeOrdering(left);
2521  unsigned ROrder = SPQ->getNodeOrdering(right);
2522 
2523  // Prefer an ordering where the lower the non-zero order number, the higher
2524  // the preference.
2525  if ((LOrder || ROrder) && LOrder != ROrder)
2526  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2527  }
2528 
2529  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2530  // e.g.
2531  // t1 = op t2, c1
2532  // t3 = op t4, c2
2533  //
2534  // and the following instructions are both ready.
2535  // t2 = op c3
2536  // t4 = op c4
2537  //
2538  // Then schedule t2 = op first.
2539  // i.e.
2540  // t4 = op c4
2541  // t2 = op c3
2542  // t1 = op t2, c1
2543  // t3 = op t4, c2
2544  //
2545  // This creates more short live intervals.
2546  unsigned LDist = closestSucc(left);
2547  unsigned RDist = closestSucc(right);
2548  if (LDist != RDist)
2549  return LDist < RDist;
2550 
2551  // How many registers becomes live when the node is scheduled.
2552  unsigned LScratch = calcMaxScratches(left);
2553  unsigned RScratch = calcMaxScratches(right);
2554  if (LScratch != RScratch)
2555  return LScratch > RScratch;
2556 
2557  // Comparing latency against a call makes little sense unless the node
2558  // is register pressure-neutral.
2559  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2560  return (left->NodeQueueId > right->NodeQueueId);
2561 
2562  // Do not compare latencies when one or both of the nodes are calls.
2563  if (!DisableSchedCycles &&
2564  !(left->isCall || right->isCall)) {
2565  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2566  if (result != 0)
2567  return result > 0;
2568  }
2569  else {
2570  if (left->getHeight() != right->getHeight())
2571  return left->getHeight() > right->getHeight();
2572 
2573  if (left->getDepth() != right->getDepth())
2574  return left->getDepth() < right->getDepth();
2575  }
2576 
2577  assert(left->NodeQueueId && right->NodeQueueId &&
2578  "NodeQueueId cannot be zero");
2579  return (left->NodeQueueId > right->NodeQueueId);
2580 }
2581 
2582 // Bottom up
2583 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2584  if (int res = checkSpecialNodes(left, right))
2585  return res > 0;
2586 
2587  return BURRSort(left, right, SPQ);
2588 }
2589 
2590 // Source order, otherwise bottom up.
2591 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2592  if (int res = checkSpecialNodes(left, right))
2593  return res > 0;
2594 
2595  unsigned LOrder = SPQ->getNodeOrdering(left);
2596  unsigned ROrder = SPQ->getNodeOrdering(right);
2597 
2598  // Prefer an ordering where the lower the non-zero order number, the higher
2599  // the preference.
2600  if ((LOrder || ROrder) && LOrder != ROrder)
2601  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2602 
2603  return BURRSort(left, right, SPQ);
2604 }
2605 
2606 // If the time between now and when the instruction will be ready can cover
2607 // the spill code, then avoid adding it to the ready queue. This gives long
2608 // stalls highest priority and allows hoisting across calls. It should also
2609 // speed up processing the available queue.
2610 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2611  static const unsigned ReadyDelay = 3;
2612 
2613  if (SPQ->MayReduceRegPressure(SU)) return true;
2614 
2615  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2616 
2617  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2619  return false;
2620 
2621  return true;
2622 }
2623 
2624 // Return true if right should be scheduled with higher priority than left.
2625 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2626  if (int res = checkSpecialNodes(left, right))
2627  return res > 0;
2628 
2629  if (left->isCall || right->isCall)
2630  // No way to compute latency of calls.
2631  return BURRSort(left, right, SPQ);
2632 
2633  bool LHigh = SPQ->HighRegPressure(left);
2634  bool RHigh = SPQ->HighRegPressure(right);
2635  // Avoid causing spills. If register pressure is high, schedule for
2636  // register pressure reduction.
2637  if (LHigh && !RHigh) {
2638  DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2639  << right->NodeNum << ")\n");
2640  return true;
2641  }
2642  else if (!LHigh && RHigh) {
2643  DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2644  << left->NodeNum << ")\n");
2645  return false;
2646  }
2647  if (!LHigh && !RHigh) {
2648  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2649  if (result != 0)
2650  return result > 0;
2651  }
2652  return BURRSort(left, right, SPQ);
2653 }
2654 
2655 // Schedule as many instructions in each cycle as possible. So don't make an
2656 // instruction available unless it is ready in the current cycle.
2657 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2658  if (SU->getHeight() > CurCycle) return false;
2659 
2660  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2662  return false;
2663 
2664  return true;
2665 }
2666 
2667 static bool canEnableCoalescing(SUnit *SU) {
2668  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2669  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2670  // CopyToReg should be close to its uses to facilitate coalescing and
2671  // avoid spilling.
2672  return true;
2673 
2674  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2675  Opc == TargetOpcode::SUBREG_TO_REG ||
2676  Opc == TargetOpcode::INSERT_SUBREG)
2677  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2678  // close to their uses to facilitate coalescing.
2679  return true;
2680 
2681  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2682  // If SU does not have a register def, schedule it close to its uses
2683  // because it does not lengthen any live ranges.
2684  return true;
2685 
2686  return false;
2687 }
2688 
2689 // list-ilp is currently an experimental scheduler that allows various
2690 // heuristics to be enabled prior to the normal register reduction logic.
2691 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2692  if (int res = checkSpecialNodes(left, right))
2693  return res > 0;
2694 
2695  if (left->isCall || right->isCall)
2696  // No way to compute latency of calls.
2697  return BURRSort(left, right, SPQ);
2698 
2699  unsigned LLiveUses = 0, RLiveUses = 0;
2700  int LPDiff = 0, RPDiff = 0;
2702  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2703  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2704  }
2705  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2706  DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2707  << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2708  return LPDiff > RPDiff;
2709  }
2710 
2711  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2712  bool LReduce = canEnableCoalescing(left);
2713  bool RReduce = canEnableCoalescing(right);
2714  if (LReduce && !RReduce) return false;
2715  if (RReduce && !LReduce) return true;
2716  }
2717 
2718  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2719  DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2720  << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2721  return LLiveUses < RLiveUses;
2722  }
2723 
2724  if (!DisableSchedStalls) {
2725  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2726  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2727  if (LStall != RStall)
2728  return left->getHeight() > right->getHeight();
2729  }
2730 
2731  if (!DisableSchedCriticalPath) {
2732  int spread = (int)left->getDepth() - (int)right->getDepth();
2733  if (std::abs(spread) > MaxReorderWindow) {
2734  DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2735  << left->getDepth() << " != SU(" << right->NodeNum << "): "
2736  << right->getDepth() << "\n");
2737  return left->getDepth() < right->getDepth();
2738  }
2739  }
2740 
2741  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2742  int spread = (int)left->getHeight() - (int)right->getHeight();
2743  if (std::abs(spread) > MaxReorderWindow)
2744  return left->getHeight() > right->getHeight();
2745  }
2746 
2747  return BURRSort(left, right, SPQ);
2748 }
2749 
2750 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2751  SUnits = &sunits;
2752  // Add pseudo dependency edges for two-address nodes.
2753  if (!Disable2AddrHack)
2754  AddPseudoTwoAddrDeps();
2755  // Reroute edges to nodes with multiple uses.
2756  if (!TracksRegPressure && !SrcOrder)
2757  PrescheduleNodesWithMultipleUses();
2758  // Calculate node priorities.
2759  CalculateSethiUllmanNumbers();
2760 
2761  // For single block loops, mark nodes that look like canonical IV increments.
2762  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2763  for (SUnit &SU : sunits)
2764  initVRegCycle(&SU);
2765 }
2766 
2767 //===----------------------------------------------------------------------===//
2768 // Preschedule for Register Pressure
2769 //===----------------------------------------------------------------------===//
2770 
2771 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2772  if (SU->isTwoAddress) {
2773  unsigned Opc = SU->getNode()->getMachineOpcode();
2774  const MCInstrDesc &MCID = TII->get(Opc);
2775  unsigned NumRes = MCID.getNumDefs();
2776  unsigned NumOps = MCID.getNumOperands() - NumRes;
2777  for (unsigned i = 0; i != NumOps; ++i) {
2778  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2779  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2780  if (DU->getNodeId() != -1 &&
2781  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2782  return true;
2783  }
2784  }
2785  }
2786  return false;
2787 }
2788 
2789 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2790 /// successor's explicit physregs whose definition can reach DepSU.
2791 /// i.e. DepSU should not be scheduled above SU.
2792 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2793  ScheduleDAGRRList *scheduleDAG,
2794  const TargetInstrInfo *TII,
2795  const TargetRegisterInfo *TRI) {
2796  const MCPhysReg *ImpDefs
2797  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2798  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2799  if(!ImpDefs && !RegMask)
2800  return false;
2801 
2802  for (const SDep &Succ : SU->Succs) {
2803  SUnit *SuccSU = Succ.getSUnit();
2804  for (const SDep &SuccPred : SuccSU->Preds) {
2805  if (!SuccPred.isAssignedRegDep())
2806  continue;
2807 
2808  if (RegMask &&
2809  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2810  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2811  return true;
2812 
2813  if (ImpDefs)
2814  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2815  // Return true if SU clobbers this physical register use and the
2816  // definition of the register reaches from DepSU. IsReachable queries
2817  // a topological forward sort of the DAG (following the successors).
2818  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2819  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2820  return true;
2821  }
2822  }
2823  return false;
2824 }
2825 
2826 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2827 /// physical register defs.
2828 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2829  const TargetInstrInfo *TII,
2830  const TargetRegisterInfo *TRI) {
2831  SDNode *N = SuccSU->getNode();
2832  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2833  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2834  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2835  for (const SDNode *SUNode = SU->getNode(); SUNode;
2836  SUNode = SUNode->getGluedNode()) {
2837  if (!SUNode->isMachineOpcode())
2838  continue;
2839  const MCPhysReg *SUImpDefs =
2840  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2841  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2842  if (!SUImpDefs && !SURegMask)
2843  continue;
2844  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2845  MVT VT = N->getSimpleValueType(i);
2846  if (VT == MVT::Glue || VT == MVT::Other)
2847  continue;
2848  if (!N->hasAnyUseOfValue(i))
2849  continue;
2850  unsigned Reg = ImpDefs[i - NumDefs];
2851  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2852  return true;
2853  if (!SUImpDefs)
2854  continue;
2855  for (;*SUImpDefs; ++SUImpDefs) {
2856  unsigned SUReg = *SUImpDefs;
2857  if (TRI->regsOverlap(Reg, SUReg))
2858  return true;
2859  }
2860  }
2861  }
2862  return false;
2863 }
2864 
2865 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2866 /// are not handled well by the general register pressure reduction
2867 /// heuristics. When presented with code like this:
2868 ///
2869 /// N
2870 /// / |
2871 /// / |
2872 /// U store
2873 /// |
2874 /// ...
2875 ///
2876 /// the heuristics tend to push the store up, but since the
2877 /// operand of the store has another use (U), this would increase
2878 /// the length of that other use (the U->N edge).
2879 ///
2880 /// This function transforms code like the above to route U's
2881 /// dependence through the store when possible, like this:
2882 ///
2883 /// N
2884 /// ||
2885 /// ||
2886 /// store
2887 /// |
2888 /// U
2889 /// |
2890 /// ...
2891 ///
2892 /// This results in the store being scheduled immediately
2893 /// after N, which shortens the U->N live range, reducing
2894 /// register pressure.
2895 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2896  // Visit all the nodes in topological order, working top-down.
2897  for (SUnit &SU : *SUnits) {
2898  // For now, only look at nodes with no data successors, such as stores.
2899  // These are especially important, due to the heuristics in
2900  // getNodePriority for nodes with no data successors.
2901  if (SU.NumSuccs != 0)
2902  continue;
2903  // For now, only look at nodes with exactly one data predecessor.
2904  if (SU.NumPreds != 1)
2905  continue;
2906  // Avoid prescheduling copies to virtual registers, which don't behave
2907  // like other nodes from the perspective of scheduling heuristics.
2908  if (SDNode *N = SU.getNode())
2909  if (N->getOpcode() == ISD::CopyToReg &&
2911  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2912  continue;
2913 
2914  // Locate the single data predecessor.
2915  SUnit *PredSU = nullptr;
2916  for (const SDep &Pred : SU.Preds)
2917  if (!Pred.isCtrl()) {
2918  PredSU = Pred.getSUnit();
2919  break;
2920  }
2921  assert(PredSU);
2922 
2923  // Don't rewrite edges that carry physregs, because that requires additional
2924  // support infrastructure.
2925  if (PredSU->hasPhysRegDefs)
2926  continue;
2927  // Short-circuit the case where SU is PredSU's only data successor.
2928  if (PredSU->NumSuccs == 1)
2929  continue;
2930  // Avoid prescheduling to copies from virtual registers, which don't behave
2931  // like other nodes from the perspective of scheduling heuristics.
2932  if (SDNode *N = SU.getNode())
2933  if (N->getOpcode() == ISD::CopyFromReg &&
2935  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2936  continue;
2937 
2938  // Perform checks on the successors of PredSU.
2939  for (const SDep &PredSucc : PredSU->Succs) {
2940  SUnit *PredSuccSU = PredSucc.getSUnit();
2941  if (PredSuccSU == &SU) continue;
2942  // If PredSU has another successor with no data successors, for
2943  // now don't attempt to choose either over the other.
2944  if (PredSuccSU->NumSuccs == 0)
2945  goto outer_loop_continue;
2946  // Don't break physical register dependencies.
2947  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2948  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2949  goto outer_loop_continue;
2950  // Don't introduce graph cycles.
2951  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2952  goto outer_loop_continue;
2953  }
2954 
2955  // Ok, the transformation is safe and the heuristics suggest it is
2956  // profitable. Update the graph.
2957  DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
2958  << " next to PredSU #" << PredSU->NodeNum
2959  << " to guide scheduling in the presence of multiple uses\n");
2960  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2961  SDep Edge = PredSU->Succs[i];
2962  assert(!Edge.isAssignedRegDep());
2963  SUnit *SuccSU = Edge.getSUnit();
2964  if (SuccSU != &SU) {
2965  Edge.setSUnit(PredSU);
2966  scheduleDAG->RemovePred(SuccSU, Edge);
2967  scheduleDAG->AddPred(&SU, Edge);
2968  Edge.setSUnit(&SU);
2969  scheduleDAG->AddPred(SuccSU, Edge);
2970  --i;
2971  }
2972  }
2973  outer_loop_continue:;
2974  }
2975 }
2976 
2977 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2978 /// it as a def&use operand. Add a pseudo control edge from it to the other
2979 /// node (if it won't create a cycle) so the two-address one will be scheduled
2980 /// first (lower in the schedule). If both nodes are two-address, favor the
2981 /// one that has a CopyToReg use (more likely to be a loop induction update).
2982 /// If both are two-address, but one is commutable while the other is not
2983 /// commutable, favor the one that's not commutable.
2984 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2985  for (SUnit &SU : *SUnits) {
2986  if (!SU.isTwoAddress)
2987  continue;
2988 
2989  SDNode *Node = SU.getNode();
2990  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
2991  continue;
2992 
2993  bool isLiveOut = hasOnlyLiveOutUses(&SU);
2994  unsigned Opc = Node->getMachineOpcode();
2995  const MCInstrDesc &MCID = TII->get(Opc);
2996  unsigned NumRes = MCID.getNumDefs();
2997  unsigned NumOps = MCID.getNumOperands() - NumRes;
2998  for (unsigned j = 0; j != NumOps; ++j) {
2999  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3000  continue;
3001  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3002  if (DU->getNodeId() == -1)
3003  continue;
3004  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3005  if (!DUSU)
3006  continue;
3007  for (const SDep &Succ : DUSU->Succs) {
3008  if (Succ.isCtrl())
3009  continue;
3010  SUnit *SuccSU = Succ.getSUnit();
3011  if (SuccSU == &SU)
3012  continue;
3013  // Be conservative. Ignore if nodes aren't at roughly the same
3014  // depth and height.
3015  if (SuccSU->getHeight() < SU.getHeight() &&
3016  (SU.getHeight() - SuccSU->getHeight()) > 1)
3017  continue;
3018  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3019  // constrains whatever is using the copy, instead of the copy
3020  // itself. In the case that the copy is coalesced, this
3021  // preserves the intent of the pseudo two-address heurietics.
3022  while (SuccSU->Succs.size() == 1 &&
3023  SuccSU->getNode()->isMachineOpcode() &&
3024  SuccSU->getNode()->getMachineOpcode() ==
3025  TargetOpcode::COPY_TO_REGCLASS)
3026  SuccSU = SuccSU->Succs.front().getSUnit();
3027  // Don't constrain non-instruction nodes.
3028  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3029  continue;
3030  // Don't constrain nodes with physical register defs if the
3031  // predecessor can clobber them.
3032  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3033  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3034  continue;
3035  }
3036  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3037  // these may be coalesced away. We want them close to their uses.
3038  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3039  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3040  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3041  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3042  continue;
3043  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3044  (!canClobber(SuccSU, DUSU) ||
3045  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3046  (!SU.isCommutable && SuccSU->isCommutable)) &&
3047  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3048  DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
3049  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3050  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3051  }
3052  }
3053  }
3054  }
3055 }
3056 
3057 //===----------------------------------------------------------------------===//
3058 // Public Constructor Functions
3059 //===----------------------------------------------------------------------===//
3060 
3063  CodeGenOpt::Level OptLevel) {
3064  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3065  const TargetInstrInfo *TII = STI.getInstrInfo();
3066  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3067 
3068  BURegReductionPriorityQueue *PQ =
3069  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3070  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3071  PQ->setScheduleDAG(SD);
3072  return SD;
3073 }
3074 
3077  CodeGenOpt::Level OptLevel) {
3078  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3079  const TargetInstrInfo *TII = STI.getInstrInfo();
3080  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3081 
3082  SrcRegReductionPriorityQueue *PQ =
3083  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3084  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3085  PQ->setScheduleDAG(SD);
3086  return SD;
3087 }
3088 
3091  CodeGenOpt::Level OptLevel) {
3092  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3093  const TargetInstrInfo *TII = STI.getInstrInfo();
3094  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3095  const TargetLowering *TLI = IS->TLI;
3096 
3097  HybridBURRPriorityQueue *PQ =
3098  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3099 
3100  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3101  PQ->setScheduleDAG(SD);
3102  return SD;
3103 }
3104 
3107  CodeGenOpt::Level OptLevel) {
3108  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3109  const TargetInstrInfo *TII = STI.getInstrInfo();
3110  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3111  const TargetLowering *TLI = IS->TLI;
3112 
3113  ILPBURRPriorityQueue *PQ =
3114  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3115  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3116  PQ->setScheduleDAG(SD);
3117  return SD;
3118 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:271
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:425
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:360
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:528
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
void dump(const ScheduleDAG *G) const
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:493
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:212
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:307
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:547
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:506
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:554
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
virtual void releaseState()=0
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
Reg
All possible values of the reg field in the ModR/M byte.
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:556
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:634
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:186
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:255
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:287
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:801
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:58
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:534
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:99
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:238
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:279
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:295
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:639
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:187
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:788
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:309
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:552
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:281
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:288
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:270
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:277
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:530
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
#define DEBUG(X)
Definition: Debug.h:118
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:291
void dumpAll(const ScheduleDAG *G) const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:286
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:444
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:821
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.