LLVM  7.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "ScheduleDAGSDNodes.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Config/llvm-config.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/MC/MCRegisterInfo.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <cstdlib>
55 #include <iterator>
56 #include <limits>
57 #include <memory>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "pre-RA-sched"
64 
65 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
66 STATISTIC(NumUnfolds, "Number of nodes unfolded");
67 STATISTIC(NumDups, "Number of duplicated nodes");
68 STATISTIC(NumPRCopies, "Number of physical register copies");
69 
70 static RegisterScheduler
71  burrListDAGScheduler("list-burr",
72  "Bottom-up register reduction list scheduling",
74 
75 static RegisterScheduler
76  sourceListDAGScheduler("source",
77  "Similar to list-burr but schedules in source "
78  "order when possible",
80 
81 static RegisterScheduler
82  hybridListDAGScheduler("list-hybrid",
83  "Bottom-up register pressure aware list scheduling "
84  "which tries to balance latency and register pressure",
86 
87 static RegisterScheduler
88  ILPListDAGScheduler("list-ilp",
89  "Bottom-up register pressure aware list scheduling "
90  "which tries to balance ILP and register pressure",
92 
94  "disable-sched-cycles", cl::Hidden, cl::init(false),
95  cl::desc("Disable cycle-level precision during preRA scheduling"));
96 
97 // Temporary sched=list-ilp flags until the heuristics are robust.
98 // Some options are also available under sched=list-hybrid.
100  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
101  cl::desc("Disable regpressure priority in sched=list-ilp"));
103  "disable-sched-live-uses", cl::Hidden, cl::init(true),
104  cl::desc("Disable live use priority in sched=list-ilp"));
106  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
107  cl::desc("Disable virtual register cycle interference checks"));
109  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
110  cl::desc("Disable physreg def-use affinity"));
112  "disable-sched-stalls", cl::Hidden, cl::init(true),
113  cl::desc("Disable no-stall priority in sched=list-ilp"));
115  "disable-sched-critical-path", cl::Hidden, cl::init(false),
116  cl::desc("Disable critical path priority in sched=list-ilp"));
118  "disable-sched-height", cl::Hidden, cl::init(false),
119  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
121  "disable-2addr-hack", cl::Hidden, cl::init(true),
122  cl::desc("Disable scheduler's two-address hack"));
123 
125  "max-sched-reorder", cl::Hidden, cl::init(6),
126  cl::desc("Number of instructions to allow ahead of the critical path "
127  "in sched=list-ilp"));
128 
130  "sched-avg-ipc", cl::Hidden, cl::init(1),
131  cl::desc("Average inst/cycle whan no target itinerary exists."));
132 
133 namespace {
134 
135 //===----------------------------------------------------------------------===//
136 /// ScheduleDAGRRList - The actual register reduction list scheduler
137 /// implementation. This supports both top-down and bottom-up scheduling.
138 ///
139 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
140 private:
141  /// NeedLatency - True if the scheduler will make use of latency information.
142  bool NeedLatency;
143 
144  /// AvailableQueue - The priority queue to use for the available SUnits.
145  SchedulingPriorityQueue *AvailableQueue;
146 
147  /// PendingQueue - This contains all of the instructions whose operands have
148  /// been issued, but their results are not ready yet (due to the latency of
149  /// the operation). Once the operands becomes available, the instruction is
150  /// added to the AvailableQueue.
151  std::vector<SUnit *> PendingQueue;
152 
153  /// HazardRec - The hazard recognizer to use.
154  ScheduleHazardRecognizer *HazardRec;
155 
156  /// CurCycle - The current scheduler state corresponds to this cycle.
157  unsigned CurCycle = 0;
158 
159  /// MinAvailableCycle - Cycle of the soonest available instruction.
160  unsigned MinAvailableCycle;
161 
162  /// IssueCount - Count instructions issued in this cycle
163  /// Currently valid only for bottom-up scheduling.
164  unsigned IssueCount;
165 
166  /// LiveRegDefs - A set of physical registers and their definition
167  /// that are "live". These nodes must be scheduled before any other nodes that
168  /// modifies the registers can be scheduled.
169  unsigned NumLiveRegs;
170  std::unique_ptr<SUnit*[]> LiveRegDefs;
171  std::unique_ptr<SUnit*[]> LiveRegGens;
172 
173  // Collect interferences between physical register use/defs.
174  // Each interference is an SUnit and set of physical registers.
175  SmallVector<SUnit*, 4> Interferences;
176 
178 
179  LRegsMapT LRegsMap;
180 
181  /// Topo - A topological ordering for SUnits which permits fast IsReachable
182  /// and similar queries.
184 
185  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
186  // DAG crawling.
187  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
188 
189 public:
190  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
191  SchedulingPriorityQueue *availqueue,
192  CodeGenOpt::Level OptLevel)
193  : ScheduleDAGSDNodes(mf),
194  NeedLatency(needlatency), AvailableQueue(availqueue),
195  Topo(SUnits, nullptr) {
196  const TargetSubtargetInfo &STI = mf.getSubtarget();
197  if (DisableSchedCycles || !NeedLatency)
198  HazardRec = new ScheduleHazardRecognizer();
199  else
200  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
201  }
202 
203  ~ScheduleDAGRRList() override {
204  delete HazardRec;
205  delete AvailableQueue;
206  }
207 
208  void Schedule() override;
209 
210  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
211 
212  /// IsReachable - Checks if SU is reachable from TargetSU.
213  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
214  return Topo.IsReachable(SU, TargetSU);
215  }
216 
217  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
218  /// create a cycle.
219  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
220  return Topo.WillCreateCycle(SU, TargetSU);
221  }
222 
223  /// AddPred - adds a predecessor edge to SUnit SU.
224  /// This returns true if this is a new predecessor.
225  /// Updates the topological ordering if required.
226  void AddPred(SUnit *SU, const SDep &D) {
227  Topo.AddPred(SU, D.getSUnit());
228  SU->addPred(D);
229  }
230 
231  /// RemovePred - removes a predecessor edge from SUnit SU.
232  /// This returns true if an edge was removed.
233  /// Updates the topological ordering if required.
234  void RemovePred(SUnit *SU, const SDep &D) {
235  Topo.RemovePred(SU, D.getSUnit());
236  SU->removePred(D);
237  }
238 
239 private:
240  bool isReady(SUnit *SU) {
241  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
242  AvailableQueue->isReady(SU);
243  }
244 
245  void ReleasePred(SUnit *SU, const SDep *PredEdge);
246  void ReleasePredecessors(SUnit *SU);
247  void ReleasePending();
248  void AdvanceToCycle(unsigned NextCycle);
249  void AdvancePastStalls(SUnit *SU);
250  void EmitNode(SUnit *SU);
251  void ScheduleNodeBottomUp(SUnit*);
252  void CapturePred(SDep *PredEdge);
253  void UnscheduleNodeBottomUp(SUnit*);
254  void RestoreHazardCheckerBottomUp();
255  void BacktrackBottomUp(SUnit*, SUnit*);
256  SUnit *TryUnfoldSU(SUnit *);
257  SUnit *CopyAndMoveSuccessors(SUnit*);
258  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
259  const TargetRegisterClass*,
260  const TargetRegisterClass*,
262  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
263 
264  void releaseInterferences(unsigned Reg = 0);
265 
266  SUnit *PickNodeToScheduleBottomUp();
267  void ListScheduleBottomUp();
268 
269  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
270  /// Updates the topological ordering if required.
271  SUnit *CreateNewSUnit(SDNode *N) {
272  unsigned NumSUnits = SUnits.size();
273  SUnit *NewNode = newSUnit(N);
274  // Update the topological ordering.
275  if (NewNode->NodeNum >= NumSUnits)
277  return NewNode;
278  }
279 
280  /// CreateClone - Creates a new SUnit from an existing one.
281  /// Updates the topological ordering if required.
282  SUnit *CreateClone(SUnit *N) {
283  unsigned NumSUnits = SUnits.size();
284  SUnit *NewNode = Clone(N);
285  // Update the topological ordering.
286  if (NewNode->NodeNum >= NumSUnits)
288  return NewNode;
289  }
290 
291  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
292  /// need actual latency information but the hybrid scheduler does.
293  bool forceUnitLatencies() const override {
294  return !NeedLatency;
295  }
296 };
297 
298 } // end anonymous namespace
299 
300 /// GetCostForDef - Looks up the register class and cost for a given definition.
301 /// Typically this just means looking up the representative register class,
302 /// but for untyped values (MVT::Untyped) it means inspecting the node's
303 /// opcode to determine what register class is being generated.
304 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
305  const TargetLowering *TLI,
306  const TargetInstrInfo *TII,
307  const TargetRegisterInfo *TRI,
308  unsigned &RegClass, unsigned &Cost,
309  const MachineFunction &MF) {
310  MVT VT = RegDefPos.GetValue();
311 
312  // Special handling for untyped values. These values can only come from
313  // the expansion of custom DAG-to-DAG patterns.
314  if (VT == MVT::Untyped) {
315  const SDNode *Node = RegDefPos.GetNode();
316 
317  // Special handling for CopyFromReg of untyped values.
318  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
319  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
320  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
321  RegClass = RC->getID();
322  Cost = 1;
323  return;
324  }
325 
326  unsigned Opcode = Node->getMachineOpcode();
327  if (Opcode == TargetOpcode::REG_SEQUENCE) {
328  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
329  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
330  RegClass = RC->getID();
331  Cost = 1;
332  return;
333  }
334 
335  unsigned Idx = RegDefPos.GetIdx();
336  const MCInstrDesc Desc = TII->get(Opcode);
337  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
338  RegClass = RC->getID();
339  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
340  // better way to determine it.
341  Cost = 1;
342  } else {
343  RegClass = TLI->getRepRegClassFor(VT)->getID();
344  Cost = TLI->getRepRegClassCostFor(VT);
345  }
346 }
347 
348 /// Schedule - Schedule the DAG using list scheduling.
349 void ScheduleDAGRRList::Schedule() {
350  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
351  << " '" << BB->getName() << "' **********\n");
352 
353  CurCycle = 0;
354  IssueCount = 0;
355  MinAvailableCycle =
357  NumLiveRegs = 0;
358  // Allocate slots for each physical register, plus one for a special register
359  // to track the virtual resource of a calling sequence.
360  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
361  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
362  CallSeqEndForStart.clear();
363  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
364 
365  // Build the scheduling graph.
366  BuildSchedGraph(nullptr);
367 
368  LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
370 
371  AvailableQueue->initNodes(SUnits);
372 
373  HazardRec->Reset();
374 
375  // Execute the actual scheduling loop.
376  ListScheduleBottomUp();
377 
378  AvailableQueue->releaseState();
379 
380  LLVM_DEBUG({
381  dbgs() << "*** Final schedule ***\n";
382  dumpSchedule();
383  dbgs() << '\n';
384  });
385 }
386 
387 //===----------------------------------------------------------------------===//
388 // Bottom-Up Scheduling
389 //===----------------------------------------------------------------------===//
390 
391 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
392 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
393 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
394  SUnit *PredSU = PredEdge->getSUnit();
395 
396 #ifndef NDEBUG
397  if (PredSU->NumSuccsLeft == 0) {
398  dbgs() << "*** Scheduling failed! ***\n";
399  PredSU->dump(this);
400  dbgs() << " has been released too many times!\n";
401  llvm_unreachable(nullptr);
402  }
403 #endif
404  --PredSU->NumSuccsLeft;
405 
406  if (!forceUnitLatencies()) {
407  // Updating predecessor's height. This is now the cycle when the
408  // predecessor can be scheduled without causing a pipeline stall.
409  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
410  }
411 
412  // If all the node's successors are scheduled, this node is ready
413  // to be scheduled. Ignore the special EntrySU node.
414  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
415  PredSU->isAvailable = true;
416 
417  unsigned Height = PredSU->getHeight();
418  if (Height < MinAvailableCycle)
419  MinAvailableCycle = Height;
420 
421  if (isReady(PredSU)) {
422  AvailableQueue->push(PredSU);
423  }
424  // CapturePred and others may have left the node in the pending queue, avoid
425  // adding it twice.
426  else if (!PredSU->isPending) {
427  PredSU->isPending = true;
428  PendingQueue.push_back(PredSU);
429  }
430  }
431 }
432 
433 /// IsChainDependent - Test if Outer is reachable from Inner through
434 /// chain dependencies.
435 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
436  unsigned NestLevel,
437  const TargetInstrInfo *TII) {
438  SDNode *N = Outer;
439  while (true) {
440  if (N == Inner)
441  return true;
442  // For a TokenFactor, examine each operand. There may be multiple ways
443  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
444  // most nesting in order to ensure that we find the corresponding match.
445  if (N->getOpcode() == ISD::TokenFactor) {
446  for (const SDValue &Op : N->op_values())
447  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
448  return true;
449  return false;
450  }
451  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
452  if (N->isMachineOpcode()) {
453  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
454  ++NestLevel;
455  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
456  if (NestLevel == 0)
457  return false;
458  --NestLevel;
459  }
460  }
461  // Otherwise, find the chain and continue climbing.
462  for (const SDValue &Op : N->op_values())
463  if (Op.getValueType() == MVT::Other) {
464  N = Op.getNode();
465  goto found_chain_operand;
466  }
467  return false;
468  found_chain_operand:;
469  if (N->getOpcode() == ISD::EntryToken)
470  return false;
471  }
472 }
473 
474 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
475 /// the corresponding (lowered) CALLSEQ_BEGIN node.
476 ///
477 /// NestLevel and MaxNested are used in recursion to indcate the current level
478 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
479 /// level seen so far.
480 ///
481 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
482 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
483 static SDNode *
484 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
485  const TargetInstrInfo *TII) {
486  while (true) {
487  // For a TokenFactor, examine each operand. There may be multiple ways
488  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
489  // most nesting in order to ensure that we find the corresponding match.
490  if (N->getOpcode() == ISD::TokenFactor) {
491  SDNode *Best = nullptr;
492  unsigned BestMaxNest = MaxNest;
493  for (const SDValue &Op : N->op_values()) {
494  unsigned MyNestLevel = NestLevel;
495  unsigned MyMaxNest = MaxNest;
496  if (SDNode *New = FindCallSeqStart(Op.getNode(),
497  MyNestLevel, MyMaxNest, TII))
498  if (!Best || (MyMaxNest > BestMaxNest)) {
499  Best = New;
500  BestMaxNest = MyMaxNest;
501  }
502  }
503  assert(Best);
504  MaxNest = BestMaxNest;
505  return Best;
506  }
507  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
508  if (N->isMachineOpcode()) {
509  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
510  ++NestLevel;
511  MaxNest = std::max(MaxNest, NestLevel);
512  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
513  assert(NestLevel != 0);
514  --NestLevel;
515  if (NestLevel == 0)
516  return N;
517  }
518  }
519  // Otherwise, find the chain and continue climbing.
520  for (const SDValue &Op : N->op_values())
521  if (Op.getValueType() == MVT::Other) {
522  N = Op.getNode();
523  goto found_chain_operand;
524  }
525  return nullptr;
526  found_chain_operand:;
527  if (N->getOpcode() == ISD::EntryToken)
528  return nullptr;
529  }
530 }
531 
532 /// Call ReleasePred for each predecessor, then update register live def/gen.
533 /// Always update LiveRegDefs for a register dependence even if the current SU
534 /// also defines the register. This effectively create one large live range
535 /// across a sequence of two-address node. This is important because the
536 /// entire chain must be scheduled together. Example:
537 ///
538 /// flags = (3) add
539 /// flags = (2) addc flags
540 /// flags = (1) addc flags
541 ///
542 /// results in
543 ///
544 /// LiveRegDefs[flags] = 3
545 /// LiveRegGens[flags] = 1
546 ///
547 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
548 /// interference on flags.
549 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
550  // Bottom up: release predecessors
551  for (SDep &Pred : SU->Preds) {
552  ReleasePred(SU, &Pred);
553  if (Pred.isAssignedRegDep()) {
554  // This is a physical register dependency and it's impossible or
555  // expensive to copy the register. Make sure nothing that can
556  // clobber the register is scheduled between the predecessor and
557  // this node.
558  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
559  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
560  "interference on register dependence");
561  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
562  if (!LiveRegGens[Pred.getReg()]) {
563  ++NumLiveRegs;
564  LiveRegGens[Pred.getReg()] = SU;
565  }
566  }
567  }
568 
569  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
570  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
571  // these nodes, to prevent other calls from being interscheduled with them.
572  unsigned CallResource = TRI->getNumRegs();
573  if (!LiveRegDefs[CallResource])
574  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
575  if (Node->isMachineOpcode() &&
576  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
577  unsigned NestLevel = 0;
578  unsigned MaxNest = 0;
579  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
580  assert(N && "Must find call sequence start");
581 
582  SUnit *Def = &SUnits[N->getNodeId()];
583  CallSeqEndForStart[Def] = SU;
584 
585  ++NumLiveRegs;
586  LiveRegDefs[CallResource] = Def;
587  LiveRegGens[CallResource] = SU;
588  break;
589  }
590 }
591 
592 /// Check to see if any of the pending instructions are ready to issue. If
593 /// so, add them to the available queue.
594 void ScheduleDAGRRList::ReleasePending() {
595  if (DisableSchedCycles) {
596  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
597  return;
598  }
599 
600  // If the available queue is empty, it is safe to reset MinAvailableCycle.
601  if (AvailableQueue->empty())
602  MinAvailableCycle = std::numeric_limits<unsigned>::max();
603 
604  // Check to see if any of the pending instructions are ready to issue. If
605  // so, add them to the available queue.
606  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
607  unsigned ReadyCycle = PendingQueue[i]->getHeight();
608  if (ReadyCycle < MinAvailableCycle)
609  MinAvailableCycle = ReadyCycle;
610 
611  if (PendingQueue[i]->isAvailable) {
612  if (!isReady(PendingQueue[i]))
613  continue;
614  AvailableQueue->push(PendingQueue[i]);
615  }
616  PendingQueue[i]->isPending = false;
617  PendingQueue[i] = PendingQueue.back();
618  PendingQueue.pop_back();
619  --i; --e;
620  }
621 }
622 
623 /// Move the scheduler state forward by the specified number of Cycles.
624 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
625  if (NextCycle <= CurCycle)
626  return;
627 
628  IssueCount = 0;
629  AvailableQueue->setCurCycle(NextCycle);
630  if (!HazardRec->isEnabled()) {
631  // Bypass lots of virtual calls in case of long latency.
632  CurCycle = NextCycle;
633  }
634  else {
635  for (; CurCycle != NextCycle; ++CurCycle) {
636  HazardRec->RecedeCycle();
637  }
638  }
639  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
640  // available Q to release pending nodes at least once before popping.
641  ReleasePending();
642 }
643 
644 /// Move the scheduler state forward until the specified node's dependents are
645 /// ready and can be scheduled with no resource conflicts.
646 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
647  if (DisableSchedCycles)
648  return;
649 
650  // FIXME: Nodes such as CopyFromReg probably should not advance the current
651  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
652  // has predecessors the cycle will be advanced when they are scheduled.
653  // But given the crude nature of modeling latency though such nodes, we
654  // currently need to treat these nodes like real instructions.
655  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
656 
657  unsigned ReadyCycle = SU->getHeight();
658 
659  // Bump CurCycle to account for latency. We assume the latency of other
660  // available instructions may be hidden by the stall (not a full pipe stall).
661  // This updates the hazard recognizer's cycle before reserving resources for
662  // this instruction.
663  AdvanceToCycle(ReadyCycle);
664 
665  // Calls are scheduled in their preceding cycle, so don't conflict with
666  // hazards from instructions after the call. EmitNode will reset the
667  // scoreboard state before emitting the call.
668  if (SU->isCall)
669  return;
670 
671  // FIXME: For resource conflicts in very long non-pipelined stages, we
672  // should probably skip ahead here to avoid useless scoreboard checks.
673  int Stalls = 0;
674  while (true) {
676  HazardRec->getHazardType(SU, -Stalls);
677 
679  break;
680 
681  ++Stalls;
682  }
683  AdvanceToCycle(CurCycle + Stalls);
684 }
685 
686 /// Record this SUnit in the HazardRecognizer.
687 /// Does not update CurCycle.
688 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
689  if (!HazardRec->isEnabled())
690  return;
691 
692  // Check for phys reg copy.
693  if (!SU->getNode())
694  return;
695 
696  switch (SU->getNode()->getOpcode()) {
697  default:
698  assert(SU->getNode()->isMachineOpcode() &&
699  "This target-independent node should not be scheduled.");
700  break;
701  case ISD::MERGE_VALUES:
702  case ISD::TokenFactor:
703  case ISD::LIFETIME_START:
704  case ISD::LIFETIME_END:
705  case ISD::CopyToReg:
706  case ISD::CopyFromReg:
707  case ISD::EH_LABEL:
708  // Noops don't affect the scoreboard state. Copies are likely to be
709  // removed.
710  return;
711  case ISD::INLINEASM:
712  // For inline asm, clear the pipeline state.
713  HazardRec->Reset();
714  return;
715  }
716  if (SU->isCall) {
717  // Calls are scheduled with their preceding instructions. For bottom-up
718  // scheduling, clear the pipeline state before emitting.
719  HazardRec->Reset();
720  }
721 
722  HazardRec->EmitInstruction(SU);
723 }
724 
725 static void resetVRegCycle(SUnit *SU);
726 
727 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
728 /// count of its predecessors. If a predecessor pending count is zero, add it to
729 /// the Available queue.
730 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
731  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
732  LLVM_DEBUG(SU->dump(this));
733 
734 #ifndef NDEBUG
735  if (CurCycle < SU->getHeight())
736  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
737  << "] pipeline stall!\n");
738 #endif
739 
740  // FIXME: Do not modify node height. It may interfere with
741  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
742  // node its ready cycle can aid heuristics, and after scheduling it can
743  // indicate the scheduled cycle.
744  SU->setHeightToAtLeast(CurCycle);
745 
746  // Reserve resources for the scheduled instruction.
747  EmitNode(SU);
748 
749  Sequence.push_back(SU);
750 
751  AvailableQueue->scheduledNode(SU);
752 
753  // If HazardRec is disabled, and each inst counts as one cycle, then
754  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
755  // PendingQueue for schedulers that implement HasReadyFilter.
756  if (!HazardRec->isEnabled() && AvgIPC < 2)
757  AdvanceToCycle(CurCycle + 1);
758 
759  // Update liveness of predecessors before successors to avoid treating a
760  // two-address node as a live range def.
761  ReleasePredecessors(SU);
762 
763  // Release all the implicit physical register defs that are live.
764  for (SDep &Succ : SU->Succs) {
765  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
766  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
767  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
768  --NumLiveRegs;
769  LiveRegDefs[Succ.getReg()] = nullptr;
770  LiveRegGens[Succ.getReg()] = nullptr;
771  releaseInterferences(Succ.getReg());
772  }
773  }
774  // Release the special call resource dependence, if this is the beginning
775  // of a call.
776  unsigned CallResource = TRI->getNumRegs();
777  if (LiveRegDefs[CallResource] == SU)
778  for (const SDNode *SUNode = SU->getNode(); SUNode;
779  SUNode = SUNode->getGluedNode()) {
780  if (SUNode->isMachineOpcode() &&
781  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
782  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
783  --NumLiveRegs;
784  LiveRegDefs[CallResource] = nullptr;
785  LiveRegGens[CallResource] = nullptr;
786  releaseInterferences(CallResource);
787  }
788  }
789 
790  resetVRegCycle(SU);
791 
792  SU->isScheduled = true;
793 
794  // Conditions under which the scheduler should eagerly advance the cycle:
795  // (1) No available instructions
796  // (2) All pipelines full, so available instructions must have hazards.
797  //
798  // If HazardRec is disabled, the cycle was pre-advanced before calling
799  // ReleasePredecessors. In that case, IssueCount should remain 0.
800  //
801  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
802  if (HazardRec->isEnabled() || AvgIPC > 1) {
803  if (SU->getNode() && SU->getNode()->isMachineOpcode())
804  ++IssueCount;
805  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
806  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
807  AdvanceToCycle(CurCycle + 1);
808  }
809 }
810 
811 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
812 /// unscheduled, increase the succ left count of its predecessors. Remove
813 /// them from AvailableQueue if necessary.
814 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
815  SUnit *PredSU = PredEdge->getSUnit();
816  if (PredSU->isAvailable) {
817  PredSU->isAvailable = false;
818  if (!PredSU->isPending)
819  AvailableQueue->remove(PredSU);
820  }
821 
823  "NumSuccsLeft will overflow!");
824  ++PredSU->NumSuccsLeft;
825 }
826 
827 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
828 /// its predecessor states to reflect the change.
829 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
830  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
831  LLVM_DEBUG(SU->dump(this));
832 
833  for (SDep &Pred : SU->Preds) {
834  CapturePred(&Pred);
835  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
836  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
837  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
838  "Physical register dependency violated?");
839  --NumLiveRegs;
840  LiveRegDefs[Pred.getReg()] = nullptr;
841  LiveRegGens[Pred.getReg()] = nullptr;
842  releaseInterferences(Pred.getReg());
843  }
844  }
845 
846  // Reclaim the special call resource dependence, if this is the beginning
847  // of a call.
848  unsigned CallResource = TRI->getNumRegs();
849  for (const SDNode *SUNode = SU->getNode(); SUNode;
850  SUNode = SUNode->getGluedNode()) {
851  if (SUNode->isMachineOpcode() &&
852  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
853  SUnit *SeqEnd = CallSeqEndForStart[SU];
854  assert(SeqEnd && "Call sequence start/end must be known");
855  assert(!LiveRegDefs[CallResource]);
856  assert(!LiveRegGens[CallResource]);
857  ++NumLiveRegs;
858  LiveRegDefs[CallResource] = SU;
859  LiveRegGens[CallResource] = SeqEnd;
860  }
861  }
862 
863  // Release the special call resource dependence, if this is the end
864  // of a call.
865  if (LiveRegGens[CallResource] == SU)
866  for (const SDNode *SUNode = SU->getNode(); SUNode;
867  SUNode = SUNode->getGluedNode()) {
868  if (SUNode->isMachineOpcode() &&
869  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
870  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
871  assert(LiveRegDefs[CallResource]);
872  assert(LiveRegGens[CallResource]);
873  --NumLiveRegs;
874  LiveRegDefs[CallResource] = nullptr;
875  LiveRegGens[CallResource] = nullptr;
876  releaseInterferences(CallResource);
877  }
878  }
879 
880  for (auto &Succ : SU->Succs) {
881  if (Succ.isAssignedRegDep()) {
882  auto Reg = Succ.getReg();
883  if (!LiveRegDefs[Reg])
884  ++NumLiveRegs;
885  // This becomes the nearest def. Note that an earlier def may still be
886  // pending if this is a two-address node.
887  LiveRegDefs[Reg] = SU;
888 
889  // Update LiveRegGen only if was empty before this unscheduling.
890  // This is to avoid incorrect updating LiveRegGen set in previous run.
891  if (!LiveRegGens[Reg]) {
892  // Find the successor with the lowest height.
893  LiveRegGens[Reg] = Succ.getSUnit();
894  for (auto &Succ2 : SU->Succs) {
895  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
896  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
897  LiveRegGens[Reg] = Succ2.getSUnit();
898  }
899  }
900  }
901  }
902  if (SU->getHeight() < MinAvailableCycle)
903  MinAvailableCycle = SU->getHeight();
904 
905  SU->setHeightDirty();
906  SU->isScheduled = false;
907  SU->isAvailable = true;
908  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
909  // Don't make available until backtracking is complete.
910  SU->isPending = true;
911  PendingQueue.push_back(SU);
912  }
913  else {
914  AvailableQueue->push(SU);
915  }
916  AvailableQueue->unscheduledNode(SU);
917 }
918 
919 /// After backtracking, the hazard checker needs to be restored to a state
920 /// corresponding the current cycle.
921 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
922  HazardRec->Reset();
923 
924  unsigned LookAhead = std::min((unsigned)Sequence.size(),
925  HazardRec->getMaxLookAhead());
926  if (LookAhead == 0)
927  return;
928 
929  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
930  unsigned HazardCycle = (*I)->getHeight();
931  for (auto E = Sequence.end(); I != E; ++I) {
932  SUnit *SU = *I;
933  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
934  HazardRec->RecedeCycle();
935  }
936  EmitNode(SU);
937  }
938 }
939 
940 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
941 /// BTCycle in order to schedule a specific node.
942 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
943  SUnit *OldSU = Sequence.back();
944  while (true) {
945  Sequence.pop_back();
946  // FIXME: use ready cycle instead of height
947  CurCycle = OldSU->getHeight();
948  UnscheduleNodeBottomUp(OldSU);
949  AvailableQueue->setCurCycle(CurCycle);
950  if (OldSU == BtSU)
951  break;
952  OldSU = Sequence.back();
953  }
954 
955  assert(!SU->isSucc(OldSU) && "Something is wrong!");
956 
957  RestoreHazardCheckerBottomUp();
958 
959  ReleasePending();
960 
961  ++NumBacktracks;
962 }
963 
964 static bool isOperandOf(const SUnit *SU, SDNode *N) {
965  for (const SDNode *SUNode = SU->getNode(); SUNode;
966  SUNode = SUNode->getGluedNode()) {
967  if (SUNode->isOperandOf(N))
968  return true;
969  }
970  return false;
971 }
972 
973 /// TryUnfold - Attempt to unfold
974 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
975  SDNode *N = SU->getNode();
976  // Use while over if to ease fall through.
977  SmallVector<SDNode *, 2> NewNodes;
978  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
979  return nullptr;
980 
981  // unfolding an x86 DEC64m operation results in store, dec, load which
982  // can't be handled here so quit
983  if (NewNodes.size() == 3)
984  return nullptr;
985 
986  assert(NewNodes.size() == 2 && "Expected a load folding node!");
987 
988  N = NewNodes[1];
989  SDNode *LoadNode = NewNodes[0];
990  unsigned NumVals = N->getNumValues();
991  unsigned OldNumVals = SU->getNode()->getNumValues();
992 
993  // LoadNode may already exist. This can happen when there is another
994  // load from the same location and producing the same type of value
995  // but it has different alignment or volatileness.
996  bool isNewLoad = true;
997  SUnit *LoadSU;
998  if (LoadNode->getNodeId() != -1) {
999  LoadSU = &SUnits[LoadNode->getNodeId()];
1000  // If LoadSU has already been scheduled, we should clone it but
1001  // this would negate the benefit to unfolding so just return SU.
1002  if (LoadSU->isScheduled)
1003  return SU;
1004  isNewLoad = false;
1005  } else {
1006  LoadSU = CreateNewSUnit(LoadNode);
1007  LoadNode->setNodeId(LoadSU->NodeNum);
1008 
1009  InitNumRegDefsLeft(LoadSU);
1010  computeLatency(LoadSU);
1011  }
1012 
1013  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1014 
1015  // Now that we are committed to unfolding replace DAG Uses.
1016  for (unsigned i = 0; i != NumVals; ++i)
1017  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1018  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1019  SDValue(LoadNode, 1));
1020 
1021  SUnit *NewSU = CreateNewSUnit(N);
1022  assert(N->getNodeId() == -1 && "Node already inserted!");
1023  N->setNodeId(NewSU->NodeNum);
1024 
1025  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1026  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1027  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1028  NewSU->isTwoAddress = true;
1029  break;
1030  }
1031  }
1032  if (MCID.isCommutable())
1033  NewSU->isCommutable = true;
1034 
1035  InitNumRegDefsLeft(NewSU);
1036  computeLatency(NewSU);
1037 
1038  // Record all the edges to and from the old SU, by category.
1039  SmallVector<SDep, 4> ChainPreds;
1040  SmallVector<SDep, 4> ChainSuccs;
1041  SmallVector<SDep, 4> LoadPreds;
1042  SmallVector<SDep, 4> NodePreds;
1043  SmallVector<SDep, 4> NodeSuccs;
1044  for (SDep &Pred : SU->Preds) {
1045  if (Pred.isCtrl())
1046  ChainPreds.push_back(Pred);
1047  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1048  LoadPreds.push_back(Pred);
1049  else
1050  NodePreds.push_back(Pred);
1051  }
1052  for (SDep &Succ : SU->Succs) {
1053  if (Succ.isCtrl())
1054  ChainSuccs.push_back(Succ);
1055  else
1056  NodeSuccs.push_back(Succ);
1057  }
1058 
1059  // Now assign edges to the newly-created nodes.
1060  for (const SDep &Pred : ChainPreds) {
1061  RemovePred(SU, Pred);
1062  if (isNewLoad)
1063  AddPred(LoadSU, Pred);
1064  }
1065  for (const SDep &Pred : LoadPreds) {
1066  RemovePred(SU, Pred);
1067  if (isNewLoad)
1068  AddPred(LoadSU, Pred);
1069  }
1070  for (const SDep &Pred : NodePreds) {
1071  RemovePred(SU, Pred);
1072  AddPred(NewSU, Pred);
1073  }
1074  for (SDep D : NodeSuccs) {
1075  SUnit *SuccDep = D.getSUnit();
1076  D.setSUnit(SU);
1077  RemovePred(SuccDep, D);
1078  D.setSUnit(NewSU);
1079  AddPred(SuccDep, D);
1080  // Balance register pressure.
1081  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1082  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1083  --NewSU->NumRegDefsLeft;
1084  }
1085  for (SDep D : ChainSuccs) {
1086  SUnit *SuccDep = D.getSUnit();
1087  D.setSUnit(SU);
1088  RemovePred(SuccDep, D);
1089  if (isNewLoad) {
1090  D.setSUnit(LoadSU);
1091  AddPred(SuccDep, D);
1092  }
1093  }
1094 
1095  // Add a data dependency to reflect that NewSU reads the value defined
1096  // by LoadSU.
1097  SDep D(LoadSU, SDep::Data, 0);
1098  D.setLatency(LoadSU->Latency);
1099  AddPred(NewSU, D);
1100 
1101  if (isNewLoad)
1102  AvailableQueue->addNode(LoadSU);
1103  AvailableQueue->addNode(NewSU);
1104 
1105  ++NumUnfolds;
1106 
1107  if (NewSU->NumSuccsLeft == 0)
1108  NewSU->isAvailable = true;
1109 
1110  return NewSU;
1111 }
1112 
1113 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1114 /// successors to the newly created node.
1115 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1116  SDNode *N = SU->getNode();
1117  if (!N)
1118  return nullptr;
1119 
1120  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1121  LLVM_DEBUG(SU->dump(this));
1122 
1123  if (N->getGluedNode() &&
1124  !TII->canCopyGluedNodeDuringSchedule(N)) {
1125  LLVM_DEBUG(
1126  dbgs()
1127  << "Giving up because it has incoming glue and the target does not "
1128  "want to copy it\n");
1129  return nullptr;
1130  }
1131 
1132  SUnit *NewSU;
1133  bool TryUnfold = false;
1134  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1135  MVT VT = N->getSimpleValueType(i);
1136  if (VT == MVT::Glue) {
1137  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1138  return nullptr;
1139  } else if (VT == MVT::Other)
1140  TryUnfold = true;
1141  }
1142  for (const SDValue &Op : N->op_values()) {
1143  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1144  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1145  LLVM_DEBUG(
1146  dbgs() << "Giving up because it one of the operands is glue and "
1147  "the target does not want to copy it\n");
1148  return nullptr;
1149  }
1150  }
1151 
1152  // If possible unfold instruction.
1153  if (TryUnfold) {
1154  SUnit *UnfoldSU = TryUnfoldSU(SU);
1155  if (!UnfoldSU)
1156  return nullptr;
1157  SU = UnfoldSU;
1158  N = SU->getNode();
1159  // If this can be scheduled don't bother duplicating and just return
1160  if (SU->NumSuccsLeft == 0)
1161  return SU;
1162  }
1163 
1164  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1165  NewSU = CreateClone(SU);
1166 
1167  // New SUnit has the exact same predecessors.
1168  for (SDep &Pred : SU->Preds)
1169  if (!Pred.isArtificial())
1170  AddPred(NewSU, Pred);
1171 
1172  // Only copy scheduled successors. Cut them from old node's successor
1173  // list and move them over.
1175  for (SDep &Succ : SU->Succs) {
1176  if (Succ.isArtificial())
1177  continue;
1178  SUnit *SuccSU = Succ.getSUnit();
1179  if (SuccSU->isScheduled) {
1180  SDep D = Succ;
1181  D.setSUnit(NewSU);
1182  AddPred(SuccSU, D);
1183  D.setSUnit(SU);
1184  DelDeps.push_back(std::make_pair(SuccSU, D));
1185  }
1186  }
1187  for (auto &DelDep : DelDeps)
1188  RemovePred(DelDep.first, DelDep.second);
1189 
1190  AvailableQueue->updateNode(SU);
1191  AvailableQueue->addNode(NewSU);
1192 
1193  ++NumDups;
1194  return NewSU;
1195 }
1196 
1197 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1198 /// scheduled successors of the given SUnit to the last copy.
1199 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1200  const TargetRegisterClass *DestRC,
1201  const TargetRegisterClass *SrcRC,
1202  SmallVectorImpl<SUnit*> &Copies) {
1203  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1204  CopyFromSU->CopySrcRC = SrcRC;
1205  CopyFromSU->CopyDstRC = DestRC;
1206 
1207  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1208  CopyToSU->CopySrcRC = DestRC;
1209  CopyToSU->CopyDstRC = SrcRC;
1210 
1211  // Only copy scheduled successors. Cut them from old node's successor
1212  // list and move them over.
1214  for (SDep &Succ : SU->Succs) {
1215  if (Succ.isArtificial())
1216  continue;
1217  SUnit *SuccSU = Succ.getSUnit();
1218  if (SuccSU->isScheduled) {
1219  SDep D = Succ;
1220  D.setSUnit(CopyToSU);
1221  AddPred(SuccSU, D);
1222  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1223  }
1224  else {
1225  // Avoid scheduling the def-side copy before other successors. Otherwise
1226  // we could introduce another physreg interference on the copy and
1227  // continue inserting copies indefinitely.
1228  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1229  }
1230  }
1231  for (auto &DelDep : DelDeps)
1232  RemovePred(DelDep.first, DelDep.second);
1233 
1234  SDep FromDep(SU, SDep::Data, Reg);
1235  FromDep.setLatency(SU->Latency);
1236  AddPred(CopyFromSU, FromDep);
1237  SDep ToDep(CopyFromSU, SDep::Data, 0);
1238  ToDep.setLatency(CopyFromSU->Latency);
1239  AddPred(CopyToSU, ToDep);
1240 
1241  AvailableQueue->updateNode(SU);
1242  AvailableQueue->addNode(CopyFromSU);
1243  AvailableQueue->addNode(CopyToSU);
1244  Copies.push_back(CopyFromSU);
1245  Copies.push_back(CopyToSU);
1246 
1247  ++NumPRCopies;
1248 }
1249 
1250 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1251 /// definition of the specified node.
1252 /// FIXME: Move to SelectionDAG?
1253 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1254  const TargetInstrInfo *TII) {
1255  unsigned NumRes;
1256  if (N->getOpcode() == ISD::CopyFromReg) {
1257  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1258  NumRes = 1;
1259  } else {
1260  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1261  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1262  NumRes = MCID.getNumDefs();
1263  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1264  if (Reg == *ImpDef)
1265  break;
1266  ++NumRes;
1267  }
1268  }
1269  return N->getSimpleValueType(NumRes);
1270 }
1271 
1272 /// CheckForLiveRegDef - Return true and update live register vector if the
1273 /// specified register def of the specified SUnit clobbers any "live" registers.
1274 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1275  SUnit **LiveRegDefs,
1276  SmallSet<unsigned, 4> &RegAdded,
1278  const TargetRegisterInfo *TRI) {
1279  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1280 
1281  // Check if Ref is live.
1282  if (!LiveRegDefs[*AliasI]) continue;
1283 
1284  // Allow multiple uses of the same def.
1285  if (LiveRegDefs[*AliasI] == SU) continue;
1286 
1287  // Add Reg to the set of interfering live regs.
1288  if (RegAdded.insert(*AliasI).second) {
1289  LRegs.push_back(*AliasI);
1290  }
1291  }
1292 }
1293 
1294 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1295 /// by RegMask, and add them to LRegs.
1296 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1297  ArrayRef<SUnit*> LiveRegDefs,
1298  SmallSet<unsigned, 4> &RegAdded,
1299  SmallVectorImpl<unsigned> &LRegs) {
1300  // Look at all live registers. Skip Reg0 and the special CallResource.
1301  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1302  if (!LiveRegDefs[i]) continue;
1303  if (LiveRegDefs[i] == SU) continue;
1304  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1305  if (RegAdded.insert(i).second)
1306  LRegs.push_back(i);
1307  }
1308 }
1309 
1310 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1311 static const uint32_t *getNodeRegMask(const SDNode *N) {
1312  for (const SDValue &Op : N->op_values())
1313  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1314  return RegOp->getRegMask();
1315  return nullptr;
1316 }
1317 
1318 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1319 /// scheduling of the given node to satisfy live physical register dependencies.
1320 /// If the specific node is the last one that's available to schedule, do
1321 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1322 bool ScheduleDAGRRList::
1323 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1324  if (NumLiveRegs == 0)
1325  return false;
1326 
1327  SmallSet<unsigned, 4> RegAdded;
1328  // If this node would clobber any "live" register, then it's not ready.
1329  //
1330  // If SU is the currently live definition of the same register that it uses,
1331  // then we are free to schedule it.
1332  for (SDep &Pred : SU->Preds) {
1333  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1334  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1335  RegAdded, LRegs, TRI);
1336  }
1337 
1338  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1339  if (Node->getOpcode() == ISD::INLINEASM) {
1340  // Inline asm can clobber physical defs.
1341  unsigned NumOps = Node->getNumOperands();
1342  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1343  --NumOps; // Ignore the glue operand.
1344 
1345  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1346  unsigned Flags =
1347  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1348  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1349 
1350  ++i; // Skip the ID value.
1351  if (InlineAsm::isRegDefKind(Flags) ||
1353  InlineAsm::isClobberKind(Flags)) {
1354  // Check for def of register or earlyclobber register.
1355  for (; NumVals; --NumVals, ++i) {
1356  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1358  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1359  }
1360  } else
1361  i += NumVals;
1362  }
1363  continue;
1364  }
1365 
1366  if (!Node->isMachineOpcode())
1367  continue;
1368  // If we're in the middle of scheduling a call, don't begin scheduling
1369  // another call. Also, don't allow any physical registers to be live across
1370  // the call.
1371  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1372  // Check the special calling-sequence resource.
1373  unsigned CallResource = TRI->getNumRegs();
1374  if (LiveRegDefs[CallResource]) {
1375  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1376  while (SDNode *Glued = Gen->getGluedNode())
1377  Gen = Glued;
1378  if (!IsChainDependent(Gen, Node, 0, TII) &&
1379  RegAdded.insert(CallResource).second)
1380  LRegs.push_back(CallResource);
1381  }
1382  }
1383  if (const uint32_t *RegMask = getNodeRegMask(Node))
1384  CheckForLiveRegDefMasked(SU, RegMask,
1385  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1386  RegAdded, LRegs);
1387 
1388  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1389  if (MCID.hasOptionalDef()) {
1390  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1391  // This operand can be either a def of CPSR, if the S bit is set; or a use
1392  // of %noreg. When the OptionalDef is set to a valid register, we need to
1393  // handle it in the same way as an ImplicitDef.
1394  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1395  if (MCID.OpInfo[i].isOptionalDef()) {
1396  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1397  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1398  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1399  }
1400  }
1401  if (!MCID.ImplicitDefs)
1402  continue;
1403  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1404  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1405  }
1406 
1407  return !LRegs.empty();
1408 }
1409 
1410 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1411  // Add the nodes that aren't ready back onto the available list.
1412  for (unsigned i = Interferences.size(); i > 0; --i) {
1413  SUnit *SU = Interferences[i-1];
1414  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1415  if (Reg) {
1416  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1417  if (!is_contained(LRegs, Reg))
1418  continue;
1419  }
1420  SU->isPending = false;
1421  // The interfering node may no longer be available due to backtracking.
1422  // Furthermore, it may have been made available again, in which case it is
1423  // now already in the AvailableQueue.
1424  if (SU->isAvailable && !SU->NodeQueueId) {
1425  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1426  AvailableQueue->push(SU);
1427  }
1428  if (i < Interferences.size())
1429  Interferences[i-1] = Interferences.back();
1430  Interferences.pop_back();
1431  LRegsMap.erase(LRegsPos);
1432  }
1433 }
1434 
1435 /// Return a node that can be scheduled in this cycle. Requirements:
1436 /// (1) Ready: latency has been satisfied
1437 /// (2) No Hazards: resources are available
1438 /// (3) No Interferences: may unschedule to break register interferences.
1439 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1440  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1441  auto FindAvailableNode = [&]() {
1442  while (CurSU) {
1444  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1445  break;
1446  LLVM_DEBUG(dbgs() << " Interfering reg ";
1447  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1448  else dbgs() << printReg(LRegs[0], TRI);
1449  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1450  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1451  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1452  if (LRegsPair.second) {
1453  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1454  Interferences.push_back(CurSU);
1455  }
1456  else {
1457  assert(CurSU->isPending && "Interferences are pending");
1458  // Update the interference with current live regs.
1459  LRegsPair.first->second = LRegs;
1460  }
1461  CurSU = AvailableQueue->pop();
1462  }
1463  };
1464  FindAvailableNode();
1465  if (CurSU)
1466  return CurSU;
1467 
1468  // All candidates are delayed due to live physical reg dependencies.
1469  // Try backtracking, code duplication, or inserting cross class copies
1470  // to resolve it.
1471  for (SUnit *TrySU : Interferences) {
1472  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1473 
1474  // Try unscheduling up to the point where it's safe to schedule
1475  // this node.
1476  SUnit *BtSU = nullptr;
1477  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1478  for (unsigned Reg : LRegs) {
1479  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1480  BtSU = LiveRegGens[Reg];
1481  LiveCycle = BtSU->getHeight();
1482  }
1483  }
1484  if (!WillCreateCycle(TrySU, BtSU)) {
1485  // BacktrackBottomUp mutates Interferences!
1486  BacktrackBottomUp(TrySU, BtSU);
1487 
1488  // Force the current node to be scheduled before the node that
1489  // requires the physical reg dep.
1490  if (BtSU->isAvailable) {
1491  BtSU->isAvailable = false;
1492  if (!BtSU->isPending)
1493  AvailableQueue->remove(BtSU);
1494  }
1495  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1496  << ") to SU(" << TrySU->NodeNum << ")\n");
1497  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1498 
1499  // If one or more successors has been unscheduled, then the current
1500  // node is no longer available.
1501  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1502  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1503  CurSU = AvailableQueue->pop();
1504  } else {
1505  LLVM_DEBUG(dbgs() << "TrySU available\n");
1506  // Available and in AvailableQueue
1507  AvailableQueue->remove(TrySU);
1508  CurSU = TrySU;
1509  }
1510  FindAvailableNode();
1511  // Interferences has been mutated. We must break.
1512  break;
1513  }
1514  }
1515 
1516  if (!CurSU) {
1517  // Can't backtrack. If it's too expensive to copy the value, then try
1518  // duplicate the nodes that produces these "too expensive to copy"
1519  // values to break the dependency. In case even that doesn't work,
1520  // insert cross class copies.
1521  // If it's not too expensive, i.e. cost != -1, issue copies.
1522  SUnit *TrySU = Interferences[0];
1523  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1524  assert(LRegs.size() == 1 && "Can't handle this yet!");
1525  unsigned Reg = LRegs[0];
1526  SUnit *LRDef = LiveRegDefs[Reg];
1527  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1528  const TargetRegisterClass *RC =
1529  TRI->getMinimalPhysRegClass(Reg, VT);
1530  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1531 
1532  // If cross copy register class is the same as RC, then it must be possible
1533  // copy the value directly. Do not try duplicate the def.
1534  // If cross copy register class is not the same as RC, then it's possible to
1535  // copy the value but it require cross register class copies and it is
1536  // expensive.
1537  // If cross copy register class is null, then it's not possible to copy
1538  // the value at all.
1539  SUnit *NewDef = nullptr;
1540  if (DestRC != RC) {
1541  NewDef = CopyAndMoveSuccessors(LRDef);
1542  if (!DestRC && !NewDef)
1543  report_fatal_error("Can't handle live physical register dependency!");
1544  }
1545  if (!NewDef) {
1546  // Issue copies, these can be expensive cross register class copies.
1547  SmallVector<SUnit*, 2> Copies;
1548  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1549  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1550  << " to SU #" << Copies.front()->NodeNum << "\n");
1551  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1552  NewDef = Copies.back();
1553  }
1554 
1555  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1556  << " to SU #" << TrySU->NodeNum << "\n");
1557  LiveRegDefs[Reg] = NewDef;
1558  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1559  TrySU->isAvailable = false;
1560  CurSU = NewDef;
1561  }
1562  assert(CurSU && "Unable to resolve live physical register dependencies!");
1563  return CurSU;
1564 }
1565 
1566 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1567 /// schedulers.
1568 void ScheduleDAGRRList::ListScheduleBottomUp() {
1569  // Release any predecessors of the special Exit node.
1570  ReleasePredecessors(&ExitSU);
1571 
1572  // Add root to Available queue.
1573  if (!SUnits.empty()) {
1574  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1575  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1576  RootSU->isAvailable = true;
1577  AvailableQueue->push(RootSU);
1578  }
1579 
1580  // While Available queue is not empty, grab the node with the highest
1581  // priority. If it is not ready put it back. Schedule the node.
1582  Sequence.reserve(SUnits.size());
1583  while (!AvailableQueue->empty() || !Interferences.empty()) {
1584  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1585  AvailableQueue->dump(this));
1586 
1587  // Pick the best node to schedule taking all constraints into
1588  // consideration.
1589  SUnit *SU = PickNodeToScheduleBottomUp();
1590 
1591  AdvancePastStalls(SU);
1592 
1593  ScheduleNodeBottomUp(SU);
1594 
1595  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1596  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1597  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1598  "MinAvailableCycle uninitialized");
1599  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1600  }
1601  }
1602 
1603  // Reverse the order if it is bottom up.
1604  std::reverse(Sequence.begin(), Sequence.end());
1605 
1606 #ifndef NDEBUG
1607  VerifyScheduledSequence(/*isBottomUp=*/true);
1608 #endif
1609 }
1610 
1611 namespace {
1612 
1613 class RegReductionPQBase;
1614 
1615 struct queue_sort {
1616  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1617 };
1618 
1619 #ifndef NDEBUG
1620 template<class SF>
1621 struct reverse_sort : public queue_sort {
1622  SF &SortFunc;
1623 
1624  reverse_sort(SF &sf) : SortFunc(sf) {}
1625 
1626  bool operator()(SUnit* left, SUnit* right) const {
1627  // reverse left/right rather than simply !SortFunc(left, right)
1628  // to expose different paths in the comparison logic.
1629  return SortFunc(right, left);
1630  }
1631 };
1632 #endif // NDEBUG
1633 
1634 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1635 // reduction scheduler.
1636 struct bu_ls_rr_sort : public queue_sort {
1637  enum {
1638  IsBottomUp = true,
1639  HasReadyFilter = false
1640  };
1641 
1642  RegReductionPQBase *SPQ;
1643 
1644  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1645 
1646  bool operator()(SUnit* left, SUnit* right) const;
1647 };
1648 
1649 // src_ls_rr_sort - Priority function for source order scheduler.
1650 struct src_ls_rr_sort : public queue_sort {
1651  enum {
1652  IsBottomUp = true,
1653  HasReadyFilter = false
1654  };
1655 
1656  RegReductionPQBase *SPQ;
1657 
1658  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1659 
1660  bool operator()(SUnit* left, SUnit* right) const;
1661 };
1662 
1663 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1664 struct hybrid_ls_rr_sort : public queue_sort {
1665  enum {
1666  IsBottomUp = true,
1667  HasReadyFilter = false
1668  };
1669 
1670  RegReductionPQBase *SPQ;
1671 
1672  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1673 
1674  bool isReady(SUnit *SU, unsigned CurCycle) const;
1675 
1676  bool operator()(SUnit* left, SUnit* right) const;
1677 };
1678 
1679 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1680 // scheduler.
1681 struct ilp_ls_rr_sort : public queue_sort {
1682  enum {
1683  IsBottomUp = true,
1684  HasReadyFilter = false
1685  };
1686 
1687  RegReductionPQBase *SPQ;
1688 
1689  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1690 
1691  bool isReady(SUnit *SU, unsigned CurCycle) const;
1692 
1693  bool operator()(SUnit* left, SUnit* right) const;
1694 };
1695 
1696 class RegReductionPQBase : public SchedulingPriorityQueue {
1697 protected:
1698  std::vector<SUnit *> Queue;
1699  unsigned CurQueueId = 0;
1700  bool TracksRegPressure;
1701  bool SrcOrder;
1702 
1703  // SUnits - The SUnits for the current graph.
1704  std::vector<SUnit> *SUnits;
1705 
1706  MachineFunction &MF;
1707  const TargetInstrInfo *TII;
1708  const TargetRegisterInfo *TRI;
1709  const TargetLowering *TLI;
1710  ScheduleDAGRRList *scheduleDAG = nullptr;
1711 
1712  // SethiUllmanNumbers - The SethiUllman number for each node.
1713  std::vector<unsigned> SethiUllmanNumbers;
1714 
1715  /// RegPressure - Tracking current reg pressure per register class.
1716  std::vector<unsigned> RegPressure;
1717 
1718  /// RegLimit - Tracking the number of allocatable registers per register
1719  /// class.
1720  std::vector<unsigned> RegLimit;
1721 
1722 public:
1723  RegReductionPQBase(MachineFunction &mf,
1724  bool hasReadyFilter,
1725  bool tracksrp,
1726  bool srcorder,
1727  const TargetInstrInfo *tii,
1728  const TargetRegisterInfo *tri,
1729  const TargetLowering *tli)
1730  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1731  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1732  if (TracksRegPressure) {
1733  unsigned NumRC = TRI->getNumRegClasses();
1734  RegLimit.resize(NumRC);
1735  RegPressure.resize(NumRC);
1736  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1737  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1738  for (const TargetRegisterClass *RC : TRI->regclasses())
1739  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1740  }
1741  }
1742 
1743  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1744  scheduleDAG = scheduleDag;
1745  }
1746 
1747  ScheduleHazardRecognizer* getHazardRec() {
1748  return scheduleDAG->getHazardRec();
1749  }
1750 
1751  void initNodes(std::vector<SUnit> &sunits) override;
1752 
1753  void addNode(const SUnit *SU) override;
1754 
1755  void updateNode(const SUnit *SU) override;
1756 
1757  void releaseState() override {
1758  SUnits = nullptr;
1759  SethiUllmanNumbers.clear();
1760  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1761  }
1762 
1763  unsigned getNodePriority(const SUnit *SU) const;
1764 
1765  unsigned getNodeOrdering(const SUnit *SU) const {
1766  if (!SU->getNode()) return 0;
1767 
1768  return SU->getNode()->getIROrder();
1769  }
1770 
1771  bool empty() const override { return Queue.empty(); }
1772 
1773  void push(SUnit *U) override {
1774  assert(!U->NodeQueueId && "Node in the queue already");
1775  U->NodeQueueId = ++CurQueueId;
1776  Queue.push_back(U);
1777  }
1778 
1779  void remove(SUnit *SU) override {
1780  assert(!Queue.empty() && "Queue is empty!");
1781  assert(SU->NodeQueueId != 0 && "Not in queue!");
1782  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1783  if (I != std::prev(Queue.end()))
1784  std::swap(*I, Queue.back());
1785  Queue.pop_back();
1786  SU->NodeQueueId = 0;
1787  }
1788 
1789  bool tracksRegPressure() const override { return TracksRegPressure; }
1790 
1791  void dumpRegPressure() const;
1792 
1793  bool HighRegPressure(const SUnit *SU) const;
1794 
1795  bool MayReduceRegPressure(SUnit *SU) const;
1796 
1797  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1798 
1799  void scheduledNode(SUnit *SU) override;
1800 
1801  void unscheduledNode(SUnit *SU) override;
1802 
1803 protected:
1804  bool canClobber(const SUnit *SU, const SUnit *Op);
1805  void AddPseudoTwoAddrDeps();
1806  void PrescheduleNodesWithMultipleUses();
1807  void CalculateSethiUllmanNumbers();
1808 };
1809 
1810 template<class SF>
1811 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1812  std::vector<SUnit *>::iterator Best = Q.begin();
1813  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1814  if (Picker(*Best, *I))
1815  Best = I;
1816  SUnit *V = *Best;
1817  if (Best != std::prev(Q.end()))
1818  std::swap(*Best, Q.back());
1819  Q.pop_back();
1820  return V;
1821 }
1822 
1823 template<class SF>
1824 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1825 #ifndef NDEBUG
1826  if (DAG->StressSched) {
1827  reverse_sort<SF> RPicker(Picker);
1828  return popFromQueueImpl(Q, RPicker);
1829  }
1830 #endif
1831  (void)DAG;
1832  return popFromQueueImpl(Q, Picker);
1833 }
1834 
1835 //===----------------------------------------------------------------------===//
1836 // RegReductionPriorityQueue Definition
1837 //===----------------------------------------------------------------------===//
1838 //
1839 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1840 // to reduce register pressure.
1841 //
1842 template<class SF>
1843 class RegReductionPriorityQueue : public RegReductionPQBase {
1844  SF Picker;
1845 
1846 public:
1847  RegReductionPriorityQueue(MachineFunction &mf,
1848  bool tracksrp,
1849  bool srcorder,
1850  const TargetInstrInfo *tii,
1851  const TargetRegisterInfo *tri,
1852  const TargetLowering *tli)
1853  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1854  tii, tri, tli),
1855  Picker(this) {}
1856 
1857  bool isBottomUp() const override { return SF::IsBottomUp; }
1858 
1859  bool isReady(SUnit *U) const override {
1860  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1861  }
1862 
1863  SUnit *pop() override {
1864  if (Queue.empty()) return nullptr;
1865 
1866  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1867  V->NodeQueueId = 0;
1868  return V;
1869  }
1870 
1871 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1872  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1873  // Emulate pop() without clobbering NodeQueueIds.
1874  std::vector<SUnit *> DumpQueue = Queue;
1875  SF DumpPicker = Picker;
1876  while (!DumpQueue.empty()) {
1877  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1878  dbgs() << "Height " << SU->getHeight() << ": ";
1879  SU->dump(DAG);
1880  }
1881  }
1882 #endif
1883 };
1884 
1885 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1886 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1887 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1888 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1889 
1890 } // end anonymous namespace
1891 
1892 //===----------------------------------------------------------------------===//
1893 // Static Node Priority for Register Pressure Reduction
1894 //===----------------------------------------------------------------------===//
1895 
1896 // Check for special nodes that bypass scheduling heuristics.
1897 // Currently this pushes TokenFactor nodes down, but may be used for other
1898 // pseudo-ops as well.
1899 //
1900 // Return -1 to schedule right above left, 1 for left above right.
1901 // Return 0 if no bias exists.
1902 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1903  bool LSchedLow = left->isScheduleLow;
1904  bool RSchedLow = right->isScheduleLow;
1905  if (LSchedLow != RSchedLow)
1906  return LSchedLow < RSchedLow ? 1 : -1;
1907  return 0;
1908 }
1909 
1910 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1911 /// Smaller number is the higher priority.
1912 static unsigned
1913 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1914  if (SUNumbers[SU->NodeNum] != 0)
1915  return SUNumbers[SU->NodeNum];
1916 
1917  // Use WorkList to avoid stack overflow on excessively large IRs.
1918  struct WorkState {
1919  WorkState(const SUnit *SU) : SU(SU) {}
1920  const SUnit *SU;
1921  unsigned PredsProcessed = 0;
1922  };
1923 
1924  SmallVector<WorkState, 16> WorkList;
1925  WorkList.push_back(SU);
1926  while (!WorkList.empty()) {
1927  auto &Temp = WorkList.back();
1928  auto *TempSU = Temp.SU;
1929  bool AllPredsKnown = true;
1930  // Try to find a non-evaluated pred and push it into the processing stack.
1931  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1932  auto &Pred = TempSU->Preds[P];
1933  if (Pred.isCtrl()) continue; // ignore chain preds
1934  SUnit *PredSU = Pred.getSUnit();
1935  if (SUNumbers[PredSU->NodeNum] == 0) {
1936 #ifndef NDEBUG
1937  // In debug mode, check that we don't have such element in the stack.
1938  for (auto It : WorkList)
1939  assert(It.SU != PredSU && "Trying to push an element twice?");
1940 #endif
1941  // Next time start processing this one starting from the next pred.
1942  Temp.PredsProcessed = P + 1;
1943  WorkList.push_back(PredSU);
1944  AllPredsKnown = false;
1945  break;
1946  }
1947  }
1948 
1949  if (!AllPredsKnown)
1950  continue;
1951 
1952  // Once all preds are known, we can calculate the answer for this one.
1953  unsigned SethiUllmanNumber = 0;
1954  unsigned Extra = 0;
1955  for (const SDep &Pred : TempSU->Preds) {
1956  if (Pred.isCtrl()) continue; // ignore chain preds
1957  SUnit *PredSU = Pred.getSUnit();
1958  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1959  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1960  if (PredSethiUllman > SethiUllmanNumber) {
1961  SethiUllmanNumber = PredSethiUllman;
1962  Extra = 0;
1963  } else if (PredSethiUllman == SethiUllmanNumber)
1964  ++Extra;
1965  }
1966 
1967  SethiUllmanNumber += Extra;
1968  if (SethiUllmanNumber == 0)
1969  SethiUllmanNumber = 1;
1970  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1971  WorkList.pop_back();
1972  }
1973 
1974  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1975  return SUNumbers[SU->NodeNum];
1976 }
1977 
1978 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1979 /// scheduling units.
1980 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1981  SethiUllmanNumbers.assign(SUnits->size(), 0);
1982 
1983  for (const SUnit &SU : *SUnits)
1984  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1985 }
1986 
1987 void RegReductionPQBase::addNode(const SUnit *SU) {
1988  unsigned SUSize = SethiUllmanNumbers.size();
1989  if (SUnits->size() > SUSize)
1990  SethiUllmanNumbers.resize(SUSize*2, 0);
1991  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1992 }
1993 
1994 void RegReductionPQBase::updateNode(const SUnit *SU) {
1995  SethiUllmanNumbers[SU->NodeNum] = 0;
1996  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1997 }
1998 
1999 // Lower priority means schedule further down. For bottom-up scheduling, lower
2000 // priority SUs are scheduled before higher priority SUs.
2001 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2002  assert(SU->NodeNum < SethiUllmanNumbers.size());
2003  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2004  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2005  // CopyToReg should be close to its uses to facilitate coalescing and
2006  // avoid spilling.
2007  return 0;
2008  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2009  Opc == TargetOpcode::SUBREG_TO_REG ||
2010  Opc == TargetOpcode::INSERT_SUBREG)
2011  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2012  // close to their uses to facilitate coalescing.
2013  return 0;
2014  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2015  // If SU does not have a register use, i.e. it doesn't produce a value
2016  // that would be consumed (e.g. store), then it terminates a chain of
2017  // computation. Give it a large SethiUllman number so it will be
2018  // scheduled right before its predecessors that it doesn't lengthen
2019  // their live ranges.
2020  return 0xffff;
2021  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2022  // If SU does not have a register def, schedule it close to its uses
2023  // because it does not lengthen any live ranges.
2024  return 0;
2025 #if 1
2026  return SethiUllmanNumbers[SU->NodeNum];
2027 #else
2028  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2029  if (SU->isCallOp) {
2030  // FIXME: This assumes all of the defs are used as call operands.
2031  int NP = (int)Priority - SU->getNode()->getNumValues();
2032  return (NP > 0) ? NP : 0;
2033  }
2034  return Priority;
2035 #endif
2036 }
2037 
2038 //===----------------------------------------------------------------------===//
2039 // Register Pressure Tracking
2040 //===----------------------------------------------------------------------===//
2041 
2042 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2043 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2044  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2045  unsigned Id = RC->getID();
2046  unsigned RP = RegPressure[Id];
2047  if (!RP) continue;
2048  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2049  << RegLimit[Id] << '\n');
2050  }
2051 }
2052 #endif
2053 
2054 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2055  if (!TLI)
2056  return false;
2057 
2058  for (const SDep &Pred : SU->Preds) {
2059  if (Pred.isCtrl())
2060  continue;
2061  SUnit *PredSU = Pred.getSUnit();
2062  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2063  // to cover the number of registers defined (they are all live).
2064  if (PredSU->NumRegDefsLeft == 0) {
2065  continue;
2066  }
2067  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2068  RegDefPos.IsValid(); RegDefPos.Advance()) {
2069  unsigned RCId, Cost;
2070  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2071 
2072  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2073  return true;
2074  }
2075  }
2076  return false;
2077 }
2078 
2079 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2080  const SDNode *N = SU->getNode();
2081 
2082  if (!N->isMachineOpcode() || !SU->NumSuccs)
2083  return false;
2084 
2085  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2086  for (unsigned i = 0; i != NumDefs; ++i) {
2087  MVT VT = N->getSimpleValueType(i);
2088  if (!N->hasAnyUseOfValue(i))
2089  continue;
2090  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2091  if (RegPressure[RCId] >= RegLimit[RCId])
2092  return true;
2093  }
2094  return false;
2095 }
2096 
2097 // Compute the register pressure contribution by this instruction by count up
2098 // for uses that are not live and down for defs. Only count register classes
2099 // that are already under high pressure. As a side effect, compute the number of
2100 // uses of registers that are already live.
2101 //
2102 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2103 // so could probably be factored.
2104 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2105  LiveUses = 0;
2106  int PDiff = 0;
2107  for (const SDep &Pred : SU->Preds) {
2108  if (Pred.isCtrl())
2109  continue;
2110  SUnit *PredSU = Pred.getSUnit();
2111  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2112  // to cover the number of registers defined (they are all live).
2113  if (PredSU->NumRegDefsLeft == 0) {
2114  if (PredSU->getNode()->isMachineOpcode())
2115  ++LiveUses;
2116  continue;
2117  }
2118  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2119  RegDefPos.IsValid(); RegDefPos.Advance()) {
2120  MVT VT = RegDefPos.GetValue();
2121  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2122  if (RegPressure[RCId] >= RegLimit[RCId])
2123  ++PDiff;
2124  }
2125  }
2126  const SDNode *N = SU->getNode();
2127 
2128  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2129  return PDiff;
2130 
2131  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2132  for (unsigned i = 0; i != NumDefs; ++i) {
2133  MVT VT = N->getSimpleValueType(i);
2134  if (!N->hasAnyUseOfValue(i))
2135  continue;
2136  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2137  if (RegPressure[RCId] >= RegLimit[RCId])
2138  --PDiff;
2139  }
2140  return PDiff;
2141 }
2142 
2143 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2144  if (!TracksRegPressure)
2145  return;
2146 
2147  if (!SU->getNode())
2148  return;
2149 
2150  for (const SDep &Pred : SU->Preds) {
2151  if (Pred.isCtrl())
2152  continue;
2153  SUnit *PredSU = Pred.getSUnit();
2154  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2155  // to cover the number of registers defined (they are all live).
2156  if (PredSU->NumRegDefsLeft == 0) {
2157  continue;
2158  }
2159  // FIXME: The ScheduleDAG currently loses information about which of a
2160  // node's values is consumed by each dependence. Consequently, if the node
2161  // defines multiple register classes, we don't know which to pressurize
2162  // here. Instead the following loop consumes the register defs in an
2163  // arbitrary order. At least it handles the common case of clustered loads
2164  // to the same class. For precise liveness, each SDep needs to indicate the
2165  // result number. But that tightly couples the ScheduleDAG with the
2166  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2167  // value type or register class to SDep.
2168  //
2169  // The most important aspect of register tracking is balancing the increase
2170  // here with the reduction further below. Note that this SU may use multiple
2171  // defs in PredSU. The can't be determined here, but we've already
2172  // compensated by reducing NumRegDefsLeft in PredSU during
2173  // ScheduleDAGSDNodes::AddSchedEdges.
2174  --PredSU->NumRegDefsLeft;
2175  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2176  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2177  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2178  if (SkipRegDefs)
2179  continue;
2180 
2181  unsigned RCId, Cost;
2182  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2183  RegPressure[RCId] += Cost;
2184  break;
2185  }
2186  }
2187 
2188  // We should have this assert, but there may be dead SDNodes that never
2189  // materialize as SUnits, so they don't appear to generate liveness.
2190  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2191  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2192  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2193  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2194  if (SkipRegDefs > 0)
2195  continue;
2196  unsigned RCId, Cost;
2197  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2198  if (RegPressure[RCId] < Cost) {
2199  // Register pressure tracking is imprecise. This can happen. But we try
2200  // hard not to let it happen because it likely results in poor scheduling.
2201  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2202  << ") has too many regdefs\n");
2203  RegPressure[RCId] = 0;
2204  }
2205  else {
2206  RegPressure[RCId] -= Cost;
2207  }
2208  }
2209  LLVM_DEBUG(dumpRegPressure());
2210 }
2211 
2212 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2213  if (!TracksRegPressure)
2214  return;
2215 
2216  const SDNode *N = SU->getNode();
2217  if (!N) return;
2218 
2219  if (!N->isMachineOpcode()) {
2220  if (N->getOpcode() != ISD::CopyToReg)
2221  return;
2222  } else {
2223  unsigned Opc = N->getMachineOpcode();
2224  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2225  Opc == TargetOpcode::INSERT_SUBREG ||
2226  Opc == TargetOpcode::SUBREG_TO_REG ||
2227  Opc == TargetOpcode::REG_SEQUENCE ||
2228  Opc == TargetOpcode::IMPLICIT_DEF)
2229  return;
2230  }
2231 
2232  for (const SDep &Pred : SU->Preds) {
2233  if (Pred.isCtrl())
2234  continue;
2235  SUnit *PredSU = Pred.getSUnit();
2236  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2237  // counts data deps.
2238  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2239  continue;
2240  const SDNode *PN = PredSU->getNode();
2241  if (!PN->isMachineOpcode()) {
2242  if (PN->getOpcode() == ISD::CopyFromReg) {
2243  MVT VT = PN->getSimpleValueType(0);
2244  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2245  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2246  }
2247  continue;
2248  }
2249  unsigned POpc = PN->getMachineOpcode();
2250  if (POpc == TargetOpcode::IMPLICIT_DEF)
2251  continue;
2252  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2253  POpc == TargetOpcode::INSERT_SUBREG ||
2254  POpc == TargetOpcode::SUBREG_TO_REG) {
2255  MVT VT = PN->getSimpleValueType(0);
2256  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2257  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2258  continue;
2259  }
2260  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2261  for (unsigned i = 0; i != NumDefs; ++i) {
2262  MVT VT = PN->getSimpleValueType(i);
2263  if (!PN->hasAnyUseOfValue(i))
2264  continue;
2265  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2266  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2267  // Register pressure tracking is imprecise. This can happen.
2268  RegPressure[RCId] = 0;
2269  else
2270  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2271  }
2272  }
2273 
2274  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2275  // may transfer data dependencies to CopyToReg.
2276  if (SU->NumSuccs && N->isMachineOpcode()) {
2277  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2278  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2279  MVT VT = N->getSimpleValueType(i);
2280  if (VT == MVT::Glue || VT == MVT::Other)
2281  continue;
2282  if (!N->hasAnyUseOfValue(i))
2283  continue;
2284  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2285  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2286  }
2287  }
2288 
2289  LLVM_DEBUG(dumpRegPressure());
2290 }
2291 
2292 //===----------------------------------------------------------------------===//
2293 // Dynamic Node Priority for Register Pressure Reduction
2294 //===----------------------------------------------------------------------===//
2295 
2296 /// closestSucc - Returns the scheduled cycle of the successor which is
2297 /// closest to the current cycle.
2298 static unsigned closestSucc(const SUnit *SU) {
2299  unsigned MaxHeight = 0;
2300  for (const SDep &Succ : SU->Succs) {
2301  if (Succ.isCtrl()) continue; // ignore chain succs
2302  unsigned Height = Succ.getSUnit()->getHeight();
2303  // If there are bunch of CopyToRegs stacked up, they should be considered
2304  // to be at the same position.
2305  if (Succ.getSUnit()->getNode() &&
2306  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2307  Height = closestSucc(Succ.getSUnit())+1;
2308  if (Height > MaxHeight)
2309  MaxHeight = Height;
2310  }
2311  return MaxHeight;
2312 }
2313 
2314 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2315 /// for scratch registers, i.e. number of data dependencies.
2316 static unsigned calcMaxScratches(const SUnit *SU) {
2317  unsigned Scratches = 0;
2318  for (const SDep &Pred : SU->Preds) {
2319  if (Pred.isCtrl()) continue; // ignore chain preds
2320  Scratches++;
2321  }
2322  return Scratches;
2323 }
2324 
2325 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2326 /// CopyFromReg from a virtual register.
2327 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2328  bool RetVal = false;
2329  for (const SDep &Pred : SU->Preds) {
2330  if (Pred.isCtrl()) continue;
2331  const SUnit *PredSU = Pred.getSUnit();
2332  if (PredSU->getNode() &&
2333  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2334  unsigned Reg =
2335  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2337  RetVal = true;
2338  continue;
2339  }
2340  }
2341  return false;
2342  }
2343  return RetVal;
2344 }
2345 
2346 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2347 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2348 /// it has no other use. It should be scheduled closer to the terminator.
2349 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2350  bool RetVal = false;
2351  for (const SDep &Succ : SU->Succs) {
2352  if (Succ.isCtrl()) continue;
2353  const SUnit *SuccSU = Succ.getSUnit();
2354  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2355  unsigned Reg =
2356  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2358  RetVal = true;
2359  continue;
2360  }
2361  }
2362  return false;
2363  }
2364  return RetVal;
2365 }
2366 
2367 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2368 // set isVRegCycle for its CopyFromReg operands.
2369 //
2370 // This is only relevant for single-block loops, in which case the VRegCycle
2371 // node is likely an induction variable in which the operand and target virtual
2372 // registers should be coalesced (e.g. pre/post increment values). Setting the
2373 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2374 // CopyFromReg so that this node becomes the virtual register "kill". This
2375 // avoids interference between the values live in and out of the block and
2376 // eliminates a copy inside the loop.
2377 static void initVRegCycle(SUnit *SU) {
2379  return;
2380 
2381  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2382  return;
2383 
2384  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2385 
2386  SU->isVRegCycle = true;
2387 
2388  for (const SDep &Pred : SU->Preds) {
2389  if (Pred.isCtrl()) continue;
2390  Pred.getSUnit()->isVRegCycle = true;
2391  }
2392 }
2393 
2394 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2395 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2396 static void resetVRegCycle(SUnit *SU) {
2397  if (!SU->isVRegCycle)
2398  return;
2399 
2400  for (const SDep &Pred : SU->Preds) {
2401  if (Pred.isCtrl()) continue; // ignore chain preds
2402  SUnit *PredSU = Pred.getSUnit();
2403  if (PredSU->isVRegCycle) {
2404  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2405  "VRegCycle def must be CopyFromReg");
2406  Pred.getSUnit()->isVRegCycle = false;
2407  }
2408  }
2409 }
2410 
2411 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2412 // means a node that defines the VRegCycle has not been scheduled yet.
2413 static bool hasVRegCycleUse(const SUnit *SU) {
2414  // If this SU also defines the VReg, don't hoist it as a "use".
2415  if (SU->isVRegCycle)
2416  return false;
2417 
2418  for (const SDep &Pred : SU->Preds) {
2419  if (Pred.isCtrl()) continue; // ignore chain preds
2420  if (Pred.getSUnit()->isVRegCycle &&
2421  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2422  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2423  return true;
2424  }
2425  }
2426  return false;
2427 }
2428 
2429 // Check for either a dependence (latency) or resource (hazard) stall.
2430 //
2431 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2432 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2433  if ((int)SPQ->getCurCycle() < Height) return true;
2434  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2436  return true;
2437  return false;
2438 }
2439 
2440 // Return -1 if left has higher priority, 1 if right has higher priority.
2441 // Return 0 if latency-based priority is equivalent.
2442 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2443  RegReductionPQBase *SPQ) {
2444  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2445  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2446  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2447  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2448  int LHeight = (int)left->getHeight() + LPenalty;
2449  int RHeight = (int)right->getHeight() + RPenalty;
2450 
2451  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2452  BUHasStall(left, LHeight, SPQ);
2453  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2454  BUHasStall(right, RHeight, SPQ);
2455 
2456  // If scheduling one of the node will cause a pipeline stall, delay it.
2457  // If scheduling either one of the node will cause a pipeline stall, sort
2458  // them according to their height.
2459  if (LStall) {
2460  if (!RStall)
2461  return 1;
2462  if (LHeight != RHeight)
2463  return LHeight > RHeight ? 1 : -1;
2464  } else if (RStall)
2465  return -1;
2466 
2467  // If either node is scheduling for latency, sort them by height/depth
2468  // and latency.
2469  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2470  right->SchedulingPref == Sched::ILP)) {
2471  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2472  // is enabled, grouping instructions by cycle, then its height is already
2473  // covered so only its depth matters. We also reach this point if both stall
2474  // but have the same height.
2475  if (!SPQ->getHazardRec()->isEnabled()) {
2476  if (LHeight != RHeight)
2477  return LHeight > RHeight ? 1 : -1;
2478  }
2479  int LDepth = left->getDepth() - LPenalty;
2480  int RDepth = right->getDepth() - RPenalty;
2481  if (LDepth != RDepth) {
2482  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2483  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2484  << ") depth " << RDepth << "\n");
2485  return LDepth < RDepth ? 1 : -1;
2486  }
2487  if (left->Latency != right->Latency)
2488  return left->Latency > right->Latency ? 1 : -1;
2489  }
2490  return 0;
2491 }
2492 
2493 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2494  // Schedule physical register definitions close to their use. This is
2495  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2496  // long as shortening physreg live ranges is generally good, we can defer
2497  // creating a subtarget hook.
2498  if (!DisableSchedPhysRegJoin) {
2499  bool LHasPhysReg = left->hasPhysRegDefs;
2500  bool RHasPhysReg = right->hasPhysRegDefs;
2501  if (LHasPhysReg != RHasPhysReg) {
2502  #ifndef NDEBUG
2503  static const char *const PhysRegMsg[] = { " has no physreg",
2504  " defines a physreg" };
2505  #endif
2506  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2507  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2508  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2509  return LHasPhysReg < RHasPhysReg;
2510  }
2511  }
2512 
2513  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2514  unsigned LPriority = SPQ->getNodePriority(left);
2515  unsigned RPriority = SPQ->getNodePriority(right);
2516 
2517  // Be really careful about hoisting call operands above previous calls.
2518  // Only allows it if it would reduce register pressure.
2519  if (left->isCall && right->isCallOp) {
2520  unsigned RNumVals = right->getNode()->getNumValues();
2521  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2522  }
2523  if (right->isCall && left->isCallOp) {
2524  unsigned LNumVals = left->getNode()->getNumValues();
2525  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2526  }
2527 
2528  if (LPriority != RPriority)
2529  return LPriority > RPriority;
2530 
2531  // One or both of the nodes are calls and their sethi-ullman numbers are the
2532  // same, then keep source order.
2533  if (left->isCall || right->isCall) {
2534  unsigned LOrder = SPQ->getNodeOrdering(left);
2535  unsigned ROrder = SPQ->getNodeOrdering(right);
2536 
2537  // Prefer an ordering where the lower the non-zero order number, the higher
2538  // the preference.
2539  if ((LOrder || ROrder) && LOrder != ROrder)
2540  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2541  }
2542 
2543  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2544  // e.g.
2545  // t1 = op t2, c1
2546  // t3 = op t4, c2
2547  //
2548  // and the following instructions are both ready.
2549  // t2 = op c3
2550  // t4 = op c4
2551  //
2552  // Then schedule t2 = op first.
2553  // i.e.
2554  // t4 = op c4
2555  // t2 = op c3
2556  // t1 = op t2, c1
2557  // t3 = op t4, c2
2558  //
2559  // This creates more short live intervals.
2560  unsigned LDist = closestSucc(left);
2561  unsigned RDist = closestSucc(right);
2562  if (LDist != RDist)
2563  return LDist < RDist;
2564 
2565  // How many registers becomes live when the node is scheduled.
2566  unsigned LScratch = calcMaxScratches(left);
2567  unsigned RScratch = calcMaxScratches(right);
2568  if (LScratch != RScratch)
2569  return LScratch > RScratch;
2570 
2571  // Comparing latency against a call makes little sense unless the node
2572  // is register pressure-neutral.
2573  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2574  return (left->NodeQueueId > right->NodeQueueId);
2575 
2576  // Do not compare latencies when one or both of the nodes are calls.
2577  if (!DisableSchedCycles &&
2578  !(left->isCall || right->isCall)) {
2579  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2580  if (result != 0)
2581  return result > 0;
2582  }
2583  else {
2584  if (left->getHeight() != right->getHeight())
2585  return left->getHeight() > right->getHeight();
2586 
2587  if (left->getDepth() != right->getDepth())
2588  return left->getDepth() < right->getDepth();
2589  }
2590 
2591  assert(left->NodeQueueId && right->NodeQueueId &&
2592  "NodeQueueId cannot be zero");
2593  return (left->NodeQueueId > right->NodeQueueId);
2594 }
2595 
2596 // Bottom up
2597 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2598  if (int res = checkSpecialNodes(left, right))
2599  return res > 0;
2600 
2601  return BURRSort(left, right, SPQ);
2602 }
2603 
2604 // Source order, otherwise bottom up.
2605 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2606  if (int res = checkSpecialNodes(left, right))
2607  return res > 0;
2608 
2609  unsigned LOrder = SPQ->getNodeOrdering(left);
2610  unsigned ROrder = SPQ->getNodeOrdering(right);
2611 
2612  // Prefer an ordering where the lower the non-zero order number, the higher
2613  // the preference.
2614  if ((LOrder || ROrder) && LOrder != ROrder)
2615  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2616 
2617  return BURRSort(left, right, SPQ);
2618 }
2619 
2620 // If the time between now and when the instruction will be ready can cover
2621 // the spill code, then avoid adding it to the ready queue. This gives long
2622 // stalls highest priority and allows hoisting across calls. It should also
2623 // speed up processing the available queue.
2624 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2625  static const unsigned ReadyDelay = 3;
2626 
2627  if (SPQ->MayReduceRegPressure(SU)) return true;
2628 
2629  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2630 
2631  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2633  return false;
2634 
2635  return true;
2636 }
2637 
2638 // Return true if right should be scheduled with higher priority than left.
2639 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2640  if (int res = checkSpecialNodes(left, right))
2641  return res > 0;
2642 
2643  if (left->isCall || right->isCall)
2644  // No way to compute latency of calls.
2645  return BURRSort(left, right, SPQ);
2646 
2647  bool LHigh = SPQ->HighRegPressure(left);
2648  bool RHigh = SPQ->HighRegPressure(right);
2649  // Avoid causing spills. If register pressure is high, schedule for
2650  // register pressure reduction.
2651  if (LHigh && !RHigh) {
2652  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2653  << right->NodeNum << ")\n");
2654  return true;
2655  }
2656  else if (!LHigh && RHigh) {
2657  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2658  << left->NodeNum << ")\n");
2659  return false;
2660  }
2661  if (!LHigh && !RHigh) {
2662  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2663  if (result != 0)
2664  return result > 0;
2665  }
2666  return BURRSort(left, right, SPQ);
2667 }
2668 
2669 // Schedule as many instructions in each cycle as possible. So don't make an
2670 // instruction available unless it is ready in the current cycle.
2671 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2672  if (SU->getHeight() > CurCycle) return false;
2673 
2674  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2676  return false;
2677 
2678  return true;
2679 }
2680 
2681 static bool canEnableCoalescing(SUnit *SU) {
2682  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2683  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2684  // CopyToReg should be close to its uses to facilitate coalescing and
2685  // avoid spilling.
2686  return true;
2687 
2688  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2689  Opc == TargetOpcode::SUBREG_TO_REG ||
2690  Opc == TargetOpcode::INSERT_SUBREG)
2691  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2692  // close to their uses to facilitate coalescing.
2693  return true;
2694 
2695  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2696  // If SU does not have a register def, schedule it close to its uses
2697  // because it does not lengthen any live ranges.
2698  return true;
2699 
2700  return false;
2701 }
2702 
2703 // list-ilp is currently an experimental scheduler that allows various
2704 // heuristics to be enabled prior to the normal register reduction logic.
2705 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2706  if (int res = checkSpecialNodes(left, right))
2707  return res > 0;
2708 
2709  if (left->isCall || right->isCall)
2710  // No way to compute latency of calls.
2711  return BURRSort(left, right, SPQ);
2712 
2713  unsigned LLiveUses = 0, RLiveUses = 0;
2714  int LPDiff = 0, RPDiff = 0;
2716  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2717  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2718  }
2719  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2720  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2721  << "): " << LPDiff << " != SU(" << right->NodeNum
2722  << "): " << RPDiff << "\n");
2723  return LPDiff > RPDiff;
2724  }
2725 
2726  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2727  bool LReduce = canEnableCoalescing(left);
2728  bool RReduce = canEnableCoalescing(right);
2729  if (LReduce && !RReduce) return false;
2730  if (RReduce && !LReduce) return true;
2731  }
2732 
2733  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2734  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2735  << " != SU(" << right->NodeNum << "): " << RLiveUses
2736  << "\n");
2737  return LLiveUses < RLiveUses;
2738  }
2739 
2740  if (!DisableSchedStalls) {
2741  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2742  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2743  if (LStall != RStall)
2744  return left->getHeight() > right->getHeight();
2745  }
2746 
2747  if (!DisableSchedCriticalPath) {
2748  int spread = (int)left->getDepth() - (int)right->getDepth();
2749  if (std::abs(spread) > MaxReorderWindow) {
2750  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2751  << left->getDepth() << " != SU(" << right->NodeNum
2752  << "): " << right->getDepth() << "\n");
2753  return left->getDepth() < right->getDepth();
2754  }
2755  }
2756 
2757  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2758  int spread = (int)left->getHeight() - (int)right->getHeight();
2759  if (std::abs(spread) > MaxReorderWindow)
2760  return left->getHeight() > right->getHeight();
2761  }
2762 
2763  return BURRSort(left, right, SPQ);
2764 }
2765 
2766 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2767  SUnits = &sunits;
2768  // Add pseudo dependency edges for two-address nodes.
2769  if (!Disable2AddrHack)
2770  AddPseudoTwoAddrDeps();
2771  // Reroute edges to nodes with multiple uses.
2772  if (!TracksRegPressure && !SrcOrder)
2773  PrescheduleNodesWithMultipleUses();
2774  // Calculate node priorities.
2775  CalculateSethiUllmanNumbers();
2776 
2777  // For single block loops, mark nodes that look like canonical IV increments.
2778  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2779  for (SUnit &SU : sunits)
2780  initVRegCycle(&SU);
2781 }
2782 
2783 //===----------------------------------------------------------------------===//
2784 // Preschedule for Register Pressure
2785 //===----------------------------------------------------------------------===//
2786 
2787 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2788  if (SU->isTwoAddress) {
2789  unsigned Opc = SU->getNode()->getMachineOpcode();
2790  const MCInstrDesc &MCID = TII->get(Opc);
2791  unsigned NumRes = MCID.getNumDefs();
2792  unsigned NumOps = MCID.getNumOperands() - NumRes;
2793  for (unsigned i = 0; i != NumOps; ++i) {
2794  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2795  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2796  if (DU->getNodeId() != -1 &&
2797  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2798  return true;
2799  }
2800  }
2801  }
2802  return false;
2803 }
2804 
2805 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2806 /// successor's explicit physregs whose definition can reach DepSU.
2807 /// i.e. DepSU should not be scheduled above SU.
2808 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2809  ScheduleDAGRRList *scheduleDAG,
2810  const TargetInstrInfo *TII,
2811  const TargetRegisterInfo *TRI) {
2812  const MCPhysReg *ImpDefs
2813  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2814  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2815  if(!ImpDefs && !RegMask)
2816  return false;
2817 
2818  for (const SDep &Succ : SU->Succs) {
2819  SUnit *SuccSU = Succ.getSUnit();
2820  for (const SDep &SuccPred : SuccSU->Preds) {
2821  if (!SuccPred.isAssignedRegDep())
2822  continue;
2823 
2824  if (RegMask &&
2825  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2826  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2827  return true;
2828 
2829  if (ImpDefs)
2830  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2831  // Return true if SU clobbers this physical register use and the
2832  // definition of the register reaches from DepSU. IsReachable queries
2833  // a topological forward sort of the DAG (following the successors).
2834  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2835  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2836  return true;
2837  }
2838  }
2839  return false;
2840 }
2841 
2842 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2843 /// physical register defs.
2844 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2845  const TargetInstrInfo *TII,
2846  const TargetRegisterInfo *TRI) {
2847  SDNode *N = SuccSU->getNode();
2848  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2849  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2850  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2851  for (const SDNode *SUNode = SU->getNode(); SUNode;
2852  SUNode = SUNode->getGluedNode()) {
2853  if (!SUNode->isMachineOpcode())
2854  continue;
2855  const MCPhysReg *SUImpDefs =
2856  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2857  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2858  if (!SUImpDefs && !SURegMask)
2859  continue;
2860  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2861  MVT VT = N->getSimpleValueType(i);
2862  if (VT == MVT::Glue || VT == MVT::Other)
2863  continue;
2864  if (!N->hasAnyUseOfValue(i))
2865  continue;
2866  unsigned Reg = ImpDefs[i - NumDefs];
2867  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2868  return true;
2869  if (!SUImpDefs)
2870  continue;
2871  for (;*SUImpDefs; ++SUImpDefs) {
2872  unsigned SUReg = *SUImpDefs;
2873  if (TRI->regsOverlap(Reg, SUReg))
2874  return true;
2875  }
2876  }
2877  }
2878  return false;
2879 }
2880 
2881 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2882 /// are not handled well by the general register pressure reduction
2883 /// heuristics. When presented with code like this:
2884 ///
2885 /// N
2886 /// / |
2887 /// / |
2888 /// U store
2889 /// |
2890 /// ...
2891 ///
2892 /// the heuristics tend to push the store up, but since the
2893 /// operand of the store has another use (U), this would increase
2894 /// the length of that other use (the U->N edge).
2895 ///
2896 /// This function transforms code like the above to route U's
2897 /// dependence through the store when possible, like this:
2898 ///
2899 /// N
2900 /// ||
2901 /// ||
2902 /// store
2903 /// |
2904 /// U
2905 /// |
2906 /// ...
2907 ///
2908 /// This results in the store being scheduled immediately
2909 /// after N, which shortens the U->N live range, reducing
2910 /// register pressure.
2911 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2912  // Visit all the nodes in topological order, working top-down.
2913  for (SUnit &SU : *SUnits) {
2914  // For now, only look at nodes with no data successors, such as stores.
2915  // These are especially important, due to the heuristics in
2916  // getNodePriority for nodes with no data successors.
2917  if (SU.NumSuccs != 0)
2918  continue;
2919  // For now, only look at nodes with exactly one data predecessor.
2920  if (SU.NumPreds != 1)
2921  continue;
2922  // Avoid prescheduling copies to virtual registers, which don't behave
2923  // like other nodes from the perspective of scheduling heuristics.
2924  if (SDNode *N = SU.getNode())
2925  if (N->getOpcode() == ISD::CopyToReg &&
2927  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2928  continue;
2929 
2930  // Locate the single data predecessor.
2931  SUnit *PredSU = nullptr;
2932  for (const SDep &Pred : SU.Preds)
2933  if (!Pred.isCtrl()) {
2934  PredSU = Pred.getSUnit();
2935  break;
2936  }
2937  assert(PredSU);
2938 
2939  // Don't rewrite edges that carry physregs, because that requires additional
2940  // support infrastructure.
2941  if (PredSU->hasPhysRegDefs)
2942  continue;
2943  // Short-circuit the case where SU is PredSU's only data successor.
2944  if (PredSU->NumSuccs == 1)
2945  continue;
2946  // Avoid prescheduling to copies from virtual registers, which don't behave
2947  // like other nodes from the perspective of scheduling heuristics.
2948  if (SDNode *N = SU.getNode())
2949  if (N->getOpcode() == ISD::CopyFromReg &&
2951  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2952  continue;
2953 
2954  // Perform checks on the successors of PredSU.
2955  for (const SDep &PredSucc : PredSU->Succs) {
2956  SUnit *PredSuccSU = PredSucc.getSUnit();
2957  if (PredSuccSU == &SU) continue;
2958  // If PredSU has another successor with no data successors, for
2959  // now don't attempt to choose either over the other.
2960  if (PredSuccSU->NumSuccs == 0)
2961  goto outer_loop_continue;
2962  // Don't break physical register dependencies.
2963  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2964  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2965  goto outer_loop_continue;
2966  // Don't introduce graph cycles.
2967  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2968  goto outer_loop_continue;
2969  }
2970 
2971  // Ok, the transformation is safe and the heuristics suggest it is
2972  // profitable. Update the graph.
2973  LLVM_DEBUG(
2974  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
2975  << PredSU->NodeNum
2976  << " to guide scheduling in the presence of multiple uses\n");
2977  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2978  SDep Edge = PredSU->Succs[i];
2979  assert(!Edge.isAssignedRegDep());
2980  SUnit *SuccSU = Edge.getSUnit();
2981  if (SuccSU != &SU) {
2982  Edge.setSUnit(PredSU);
2983  scheduleDAG->RemovePred(SuccSU, Edge);
2984  scheduleDAG->AddPred(&SU, Edge);
2985  Edge.setSUnit(&SU);
2986  scheduleDAG->AddPred(SuccSU, Edge);
2987  --i;
2988  }
2989  }
2990  outer_loop_continue:;
2991  }
2992 }
2993 
2994 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2995 /// it as a def&use operand. Add a pseudo control edge from it to the other
2996 /// node (if it won't create a cycle) so the two-address one will be scheduled
2997 /// first (lower in the schedule). If both nodes are two-address, favor the
2998 /// one that has a CopyToReg use (more likely to be a loop induction update).
2999 /// If both are two-address, but one is commutable while the other is not
3000 /// commutable, favor the one that's not commutable.
3001 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3002  for (SUnit &SU : *SUnits) {
3003  if (!SU.isTwoAddress)
3004  continue;
3005 
3006  SDNode *Node = SU.getNode();
3007  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3008  continue;
3009 
3010  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3011  unsigned Opc = Node->getMachineOpcode();
3012  const MCInstrDesc &MCID = TII->get(Opc);
3013  unsigned NumRes = MCID.getNumDefs();
3014  unsigned NumOps = MCID.getNumOperands() - NumRes;
3015  for (unsigned j = 0; j != NumOps; ++j) {
3016  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3017  continue;
3018  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3019  if (DU->getNodeId() == -1)
3020  continue;
3021  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3022  if (!DUSU)
3023  continue;
3024  for (const SDep &Succ : DUSU->Succs) {
3025  if (Succ.isCtrl())
3026  continue;
3027  SUnit *SuccSU = Succ.getSUnit();
3028  if (SuccSU == &SU)
3029  continue;
3030  // Be conservative. Ignore if nodes aren't at roughly the same
3031  // depth and height.
3032  if (SuccSU->getHeight() < SU.getHeight() &&
3033  (SU.getHeight() - SuccSU->getHeight()) > 1)
3034  continue;
3035  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3036  // constrains whatever is using the copy, instead of the copy
3037  // itself. In the case that the copy is coalesced, this
3038  // preserves the intent of the pseudo two-address heurietics.
3039  while (SuccSU->Succs.size() == 1 &&
3040  SuccSU->getNode()->isMachineOpcode() &&
3041  SuccSU->getNode()->getMachineOpcode() ==
3042  TargetOpcode::COPY_TO_REGCLASS)
3043  SuccSU = SuccSU->Succs.front().getSUnit();
3044  // Don't constrain non-instruction nodes.
3045  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3046  continue;
3047  // Don't constrain nodes with physical register defs if the
3048  // predecessor can clobber them.
3049  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3050  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3051  continue;
3052  }
3053  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3054  // these may be coalesced away. We want them close to their uses.
3055  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3056  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3057  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3058  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3059  continue;
3060  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3061  (!canClobber(SuccSU, DUSU) ||
3062  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3063  (!SU.isCommutable && SuccSU->isCommutable)) &&
3064  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3065  LLVM_DEBUG(dbgs()
3066  << " Adding a pseudo-two-addr edge from SU #"
3067  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3068  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3069  }
3070  }
3071  }
3072  }
3073 }
3074 
3075 //===----------------------------------------------------------------------===//
3076 // Public Constructor Functions
3077 //===----------------------------------------------------------------------===//
3078 
3081  CodeGenOpt::Level OptLevel) {
3082  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3083  const TargetInstrInfo *TII = STI.getInstrInfo();
3084  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3085 
3086  BURegReductionPriorityQueue *PQ =
3087  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3088  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3089  PQ->setScheduleDAG(SD);
3090  return SD;
3091 }
3092 
3095  CodeGenOpt::Level OptLevel) {
3096  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3097  const TargetInstrInfo *TII = STI.getInstrInfo();
3098  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3099 
3100  SrcRegReductionPriorityQueue *PQ =
3101  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3102  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3103  PQ->setScheduleDAG(SD);
3104  return SD;
3105 }
3106 
3109  CodeGenOpt::Level OptLevel) {
3110  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3111  const TargetInstrInfo *TII = STI.getInstrInfo();
3112  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3113  const TargetLowering *TLI = IS->TLI;
3114 
3115  HybridBURRPriorityQueue *PQ =
3116  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3117 
3118  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3119  PQ->setScheduleDAG(SD);
3120  return SD;
3121 }
3122 
3125  CodeGenOpt::Level OptLevel) {
3126  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3127  const TargetInstrInfo *TII = STI.getInstrInfo();
3128  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3129  const TargetLowering *TLI = IS->TLI;
3130 
3131  ILPBURRPriorityQueue *PQ =
3132  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3133  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3134  PQ->setScheduleDAG(SD);
3135  return SD;
3136 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:271
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:426
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:360
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:528
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
void dump(const ScheduleDAG *G) const
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:161
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:493
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:212
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:307
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:547
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:506
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:554
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:208
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:556
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:628
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:237
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:255
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:287
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:796
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:58
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:535
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:96
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:236
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:171
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:279
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:295
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:929
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:633
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:185
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:223
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:309
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:552
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:281
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:288
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:270
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:277
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:530
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:172
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:291
void dumpAll(const ScheduleDAG *G) const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
#define LLVM_DEBUG(X)
Definition: Debug.h:119
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:286
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:444
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:967
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.