LLVM  10.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Config/llvm-config.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Does *NOT* update the topological ordering! It just queues an update.
225  void AddPredQueued(SUnit *SU, const SDep &D) {
226  Topo.AddPredQueued(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// AddPred - adds a predecessor edge to SUnit SU.
231  /// This returns true if this is a new predecessor.
232  /// Updates the topological ordering if required.
233  void AddPred(SUnit *SU, const SDep &D) {
234  Topo.AddPred(SU, D.getSUnit());
235  SU->addPred(D);
236  }
237 
238  /// RemovePred - removes a predecessor edge from SUnit SU.
239  /// This returns true if an edge was removed.
240  /// Updates the topological ordering if required.
241  void RemovePred(SUnit *SU, const SDep &D) {
242  Topo.RemovePred(SU, D.getSUnit());
243  SU->removePred(D);
244  }
245 
246 private:
247  bool isReady(SUnit *SU) {
248  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
249  AvailableQueue->isReady(SU);
250  }
251 
252  void ReleasePred(SUnit *SU, const SDep *PredEdge);
253  void ReleasePredecessors(SUnit *SU);
254  void ReleasePending();
255  void AdvanceToCycle(unsigned NextCycle);
256  void AdvancePastStalls(SUnit *SU);
257  void EmitNode(SUnit *SU);
258  void ScheduleNodeBottomUp(SUnit*);
259  void CapturePred(SDep *PredEdge);
260  void UnscheduleNodeBottomUp(SUnit*);
261  void RestoreHazardCheckerBottomUp();
262  void BacktrackBottomUp(SUnit*, SUnit*);
263  SUnit *TryUnfoldSU(SUnit *);
264  SUnit *CopyAndMoveSuccessors(SUnit*);
265  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
266  const TargetRegisterClass*,
267  const TargetRegisterClass*,
269  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
270 
271  void releaseInterferences(unsigned Reg = 0);
272 
273  SUnit *PickNodeToScheduleBottomUp();
274  void ListScheduleBottomUp();
275 
276  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
277  SUnit *CreateNewSUnit(SDNode *N) {
278  unsigned NumSUnits = SUnits.size();
279  SUnit *NewNode = newSUnit(N);
280  // Update the topological ordering.
281  if (NewNode->NodeNum >= NumSUnits)
282  Topo.MarkDirty();
283  return NewNode;
284  }
285 
286  /// CreateClone - Creates a new SUnit from an existing one.
287  SUnit *CreateClone(SUnit *N) {
288  unsigned NumSUnits = SUnits.size();
289  SUnit *NewNode = Clone(N);
290  // Update the topological ordering.
291  if (NewNode->NodeNum >= NumSUnits)
292  Topo.MarkDirty();
293  return NewNode;
294  }
295 
296  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
297  /// need actual latency information but the hybrid scheduler does.
298  bool forceUnitLatencies() const override {
299  return !NeedLatency;
300  }
301 };
302 
303 } // end anonymous namespace
304 
305 /// GetCostForDef - Looks up the register class and cost for a given definition.
306 /// Typically this just means looking up the representative register class,
307 /// but for untyped values (MVT::Untyped) it means inspecting the node's
308 /// opcode to determine what register class is being generated.
309 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
310  const TargetLowering *TLI,
311  const TargetInstrInfo *TII,
312  const TargetRegisterInfo *TRI,
313  unsigned &RegClass, unsigned &Cost,
314  const MachineFunction &MF) {
315  MVT VT = RegDefPos.GetValue();
316 
317  // Special handling for untyped values. These values can only come from
318  // the expansion of custom DAG-to-DAG patterns.
319  if (VT == MVT::Untyped) {
320  const SDNode *Node = RegDefPos.GetNode();
321 
322  // Special handling for CopyFromReg of untyped values.
323  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
324  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
325  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
326  RegClass = RC->getID();
327  Cost = 1;
328  return;
329  }
330 
331  unsigned Opcode = Node->getMachineOpcode();
332  if (Opcode == TargetOpcode::REG_SEQUENCE) {
333  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
334  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
335  RegClass = RC->getID();
336  Cost = 1;
337  return;
338  }
339 
340  unsigned Idx = RegDefPos.GetIdx();
341  const MCInstrDesc Desc = TII->get(Opcode);
342  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
343  RegClass = RC->getID();
344  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
345  // better way to determine it.
346  Cost = 1;
347  } else {
348  RegClass = TLI->getRepRegClassFor(VT)->getID();
349  Cost = TLI->getRepRegClassCostFor(VT);
350  }
351 }
352 
353 /// Schedule - Schedule the DAG using list scheduling.
354 void ScheduleDAGRRList::Schedule() {
355  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
356  << " '" << BB->getName() << "' **********\n");
357 
358  CurCycle = 0;
359  IssueCount = 0;
360  MinAvailableCycle =
362  NumLiveRegs = 0;
363  // Allocate slots for each physical register, plus one for a special register
364  // to track the virtual resource of a calling sequence.
365  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
366  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
367  CallSeqEndForStart.clear();
368  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
369 
370  // Build the scheduling graph.
371  BuildSchedGraph(nullptr);
372 
373  LLVM_DEBUG(dump());
374  Topo.MarkDirty();
375 
376  AvailableQueue->initNodes(SUnits);
377 
378  HazardRec->Reset();
379 
380  // Execute the actual scheduling loop.
381  ListScheduleBottomUp();
382 
383  AvailableQueue->releaseState();
384 
385  LLVM_DEBUG({
386  dbgs() << "*** Final schedule ***\n";
387  dumpSchedule();
388  dbgs() << '\n';
389  });
390 }
391 
392 //===----------------------------------------------------------------------===//
393 // Bottom-Up Scheduling
394 //===----------------------------------------------------------------------===//
395 
396 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
397 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
398 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
399  SUnit *PredSU = PredEdge->getSUnit();
400 
401 #ifndef NDEBUG
402  if (PredSU->NumSuccsLeft == 0) {
403  dbgs() << "*** Scheduling failed! ***\n";
404  dumpNode(*PredSU);
405  dbgs() << " has been released too many times!\n";
406  llvm_unreachable(nullptr);
407  }
408 #endif
409  --PredSU->NumSuccsLeft;
410 
411  if (!forceUnitLatencies()) {
412  // Updating predecessor's height. This is now the cycle when the
413  // predecessor can be scheduled without causing a pipeline stall.
414  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
415  }
416 
417  // If all the node's successors are scheduled, this node is ready
418  // to be scheduled. Ignore the special EntrySU node.
419  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
420  PredSU->isAvailable = true;
421 
422  unsigned Height = PredSU->getHeight();
423  if (Height < MinAvailableCycle)
424  MinAvailableCycle = Height;
425 
426  if (isReady(PredSU)) {
427  AvailableQueue->push(PredSU);
428  }
429  // CapturePred and others may have left the node in the pending queue, avoid
430  // adding it twice.
431  else if (!PredSU->isPending) {
432  PredSU->isPending = true;
433  PendingQueue.push_back(PredSU);
434  }
435  }
436 }
437 
438 /// IsChainDependent - Test if Outer is reachable from Inner through
439 /// chain dependencies.
440 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
441  unsigned NestLevel,
442  const TargetInstrInfo *TII) {
443  SDNode *N = Outer;
444  while (true) {
445  if (N == Inner)
446  return true;
447  // For a TokenFactor, examine each operand. There may be multiple ways
448  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
449  // most nesting in order to ensure that we find the corresponding match.
450  if (N->getOpcode() == ISD::TokenFactor) {
451  for (const SDValue &Op : N->op_values())
452  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
453  return true;
454  return false;
455  }
456  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
457  if (N->isMachineOpcode()) {
458  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
459  ++NestLevel;
460  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
461  if (NestLevel == 0)
462  return false;
463  --NestLevel;
464  }
465  }
466  // Otherwise, find the chain and continue climbing.
467  for (const SDValue &Op : N->op_values())
468  if (Op.getValueType() == MVT::Other) {
469  N = Op.getNode();
470  goto found_chain_operand;
471  }
472  return false;
473  found_chain_operand:;
474  if (N->getOpcode() == ISD::EntryToken)
475  return false;
476  }
477 }
478 
479 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
480 /// the corresponding (lowered) CALLSEQ_BEGIN node.
481 ///
482 /// NestLevel and MaxNested are used in recursion to indcate the current level
483 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
484 /// level seen so far.
485 ///
486 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
487 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
488 static SDNode *
489 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
490  const TargetInstrInfo *TII) {
491  while (true) {
492  // For a TokenFactor, examine each operand. There may be multiple ways
493  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
494  // most nesting in order to ensure that we find the corresponding match.
495  if (N->getOpcode() == ISD::TokenFactor) {
496  SDNode *Best = nullptr;
497  unsigned BestMaxNest = MaxNest;
498  for (const SDValue &Op : N->op_values()) {
499  unsigned MyNestLevel = NestLevel;
500  unsigned MyMaxNest = MaxNest;
501  if (SDNode *New = FindCallSeqStart(Op.getNode(),
502  MyNestLevel, MyMaxNest, TII))
503  if (!Best || (MyMaxNest > BestMaxNest)) {
504  Best = New;
505  BestMaxNest = MyMaxNest;
506  }
507  }
508  assert(Best);
509  MaxNest = BestMaxNest;
510  return Best;
511  }
512  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
513  if (N->isMachineOpcode()) {
514  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
515  ++NestLevel;
516  MaxNest = std::max(MaxNest, NestLevel);
517  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
518  assert(NestLevel != 0);
519  --NestLevel;
520  if (NestLevel == 0)
521  return N;
522  }
523  }
524  // Otherwise, find the chain and continue climbing.
525  for (const SDValue &Op : N->op_values())
526  if (Op.getValueType() == MVT::Other) {
527  N = Op.getNode();
528  goto found_chain_operand;
529  }
530  return nullptr;
531  found_chain_operand:;
532  if (N->getOpcode() == ISD::EntryToken)
533  return nullptr;
534  }
535 }
536 
537 /// Call ReleasePred for each predecessor, then update register live def/gen.
538 /// Always update LiveRegDefs for a register dependence even if the current SU
539 /// also defines the register. This effectively create one large live range
540 /// across a sequence of two-address node. This is important because the
541 /// entire chain must be scheduled together. Example:
542 ///
543 /// flags = (3) add
544 /// flags = (2) addc flags
545 /// flags = (1) addc flags
546 ///
547 /// results in
548 ///
549 /// LiveRegDefs[flags] = 3
550 /// LiveRegGens[flags] = 1
551 ///
552 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
553 /// interference on flags.
554 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
555  // Bottom up: release predecessors
556  for (SDep &Pred : SU->Preds) {
557  ReleasePred(SU, &Pred);
558  if (Pred.isAssignedRegDep()) {
559  // This is a physical register dependency and it's impossible or
560  // expensive to copy the register. Make sure nothing that can
561  // clobber the register is scheduled between the predecessor and
562  // this node.
563  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
564  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
565  "interference on register dependence");
566  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
567  if (!LiveRegGens[Pred.getReg()]) {
568  ++NumLiveRegs;
569  LiveRegGens[Pred.getReg()] = SU;
570  }
571  }
572  }
573 
574  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
575  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
576  // these nodes, to prevent other calls from being interscheduled with them.
577  unsigned CallResource = TRI->getNumRegs();
578  if (!LiveRegDefs[CallResource])
579  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
580  if (Node->isMachineOpcode() &&
581  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
582  unsigned NestLevel = 0;
583  unsigned MaxNest = 0;
584  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
585  assert(N && "Must find call sequence start");
586 
587  SUnit *Def = &SUnits[N->getNodeId()];
588  CallSeqEndForStart[Def] = SU;
589 
590  ++NumLiveRegs;
591  LiveRegDefs[CallResource] = Def;
592  LiveRegGens[CallResource] = SU;
593  break;
594  }
595 }
596 
597 /// Check to see if any of the pending instructions are ready to issue. If
598 /// so, add them to the available queue.
599 void ScheduleDAGRRList::ReleasePending() {
600  if (DisableSchedCycles) {
601  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
602  return;
603  }
604 
605  // If the available queue is empty, it is safe to reset MinAvailableCycle.
606  if (AvailableQueue->empty())
607  MinAvailableCycle = std::numeric_limits<unsigned>::max();
608 
609  // Check to see if any of the pending instructions are ready to issue. If
610  // so, add them to the available queue.
611  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
612  unsigned ReadyCycle = PendingQueue[i]->getHeight();
613  if (ReadyCycle < MinAvailableCycle)
614  MinAvailableCycle = ReadyCycle;
615 
616  if (PendingQueue[i]->isAvailable) {
617  if (!isReady(PendingQueue[i]))
618  continue;
619  AvailableQueue->push(PendingQueue[i]);
620  }
621  PendingQueue[i]->isPending = false;
622  PendingQueue[i] = PendingQueue.back();
623  PendingQueue.pop_back();
624  --i; --e;
625  }
626 }
627 
628 /// Move the scheduler state forward by the specified number of Cycles.
629 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
630  if (NextCycle <= CurCycle)
631  return;
632 
633  IssueCount = 0;
634  AvailableQueue->setCurCycle(NextCycle);
635  if (!HazardRec->isEnabled()) {
636  // Bypass lots of virtual calls in case of long latency.
637  CurCycle = NextCycle;
638  }
639  else {
640  for (; CurCycle != NextCycle; ++CurCycle) {
641  HazardRec->RecedeCycle();
642  }
643  }
644  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
645  // available Q to release pending nodes at least once before popping.
646  ReleasePending();
647 }
648 
649 /// Move the scheduler state forward until the specified node's dependents are
650 /// ready and can be scheduled with no resource conflicts.
651 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
652  if (DisableSchedCycles)
653  return;
654 
655  // FIXME: Nodes such as CopyFromReg probably should not advance the current
656  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
657  // has predecessors the cycle will be advanced when they are scheduled.
658  // But given the crude nature of modeling latency though such nodes, we
659  // currently need to treat these nodes like real instructions.
660  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
661 
662  unsigned ReadyCycle = SU->getHeight();
663 
664  // Bump CurCycle to account for latency. We assume the latency of other
665  // available instructions may be hidden by the stall (not a full pipe stall).
666  // This updates the hazard recognizer's cycle before reserving resources for
667  // this instruction.
668  AdvanceToCycle(ReadyCycle);
669 
670  // Calls are scheduled in their preceding cycle, so don't conflict with
671  // hazards from instructions after the call. EmitNode will reset the
672  // scoreboard state before emitting the call.
673  if (SU->isCall)
674  return;
675 
676  // FIXME: For resource conflicts in very long non-pipelined stages, we
677  // should probably skip ahead here to avoid useless scoreboard checks.
678  int Stalls = 0;
679  while (true) {
681  HazardRec->getHazardType(SU, -Stalls);
682 
684  break;
685 
686  ++Stalls;
687  }
688  AdvanceToCycle(CurCycle + Stalls);
689 }
690 
691 /// Record this SUnit in the HazardRecognizer.
692 /// Does not update CurCycle.
693 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
694  if (!HazardRec->isEnabled())
695  return;
696 
697  // Check for phys reg copy.
698  if (!SU->getNode())
699  return;
700 
701  switch (SU->getNode()->getOpcode()) {
702  default:
703  assert(SU->getNode()->isMachineOpcode() &&
704  "This target-independent node should not be scheduled.");
705  break;
706  case ISD::MERGE_VALUES:
707  case ISD::TokenFactor:
708  case ISD::LIFETIME_START:
709  case ISD::LIFETIME_END:
710  case ISD::CopyToReg:
711  case ISD::CopyFromReg:
712  case ISD::EH_LABEL:
713  // Noops don't affect the scoreboard state. Copies are likely to be
714  // removed.
715  return;
716  case ISD::INLINEASM:
717  case ISD::INLINEASM_BR:
718  // For inline asm, clear the pipeline state.
719  HazardRec->Reset();
720  return;
721  }
722  if (SU->isCall) {
723  // Calls are scheduled with their preceding instructions. For bottom-up
724  // scheduling, clear the pipeline state before emitting.
725  HazardRec->Reset();
726  }
727 
728  HazardRec->EmitInstruction(SU);
729 }
730 
731 static void resetVRegCycle(SUnit *SU);
732 
733 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
734 /// count of its predecessors. If a predecessor pending count is zero, add it to
735 /// the Available queue.
736 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
737  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
738  LLVM_DEBUG(dumpNode(*SU));
739 
740 #ifndef NDEBUG
741  if (CurCycle < SU->getHeight())
742  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
743  << "] pipeline stall!\n");
744 #endif
745 
746  // FIXME: Do not modify node height. It may interfere with
747  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
748  // node its ready cycle can aid heuristics, and after scheduling it can
749  // indicate the scheduled cycle.
750  SU->setHeightToAtLeast(CurCycle);
751 
752  // Reserve resources for the scheduled instruction.
753  EmitNode(SU);
754 
755  Sequence.push_back(SU);
756 
757  AvailableQueue->scheduledNode(SU);
758 
759  // If HazardRec is disabled, and each inst counts as one cycle, then
760  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
761  // PendingQueue for schedulers that implement HasReadyFilter.
762  if (!HazardRec->isEnabled() && AvgIPC < 2)
763  AdvanceToCycle(CurCycle + 1);
764 
765  // Update liveness of predecessors before successors to avoid treating a
766  // two-address node as a live range def.
767  ReleasePredecessors(SU);
768 
769  // Release all the implicit physical register defs that are live.
770  for (SDep &Succ : SU->Succs) {
771  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
772  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
773  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
774  --NumLiveRegs;
775  LiveRegDefs[Succ.getReg()] = nullptr;
776  LiveRegGens[Succ.getReg()] = nullptr;
777  releaseInterferences(Succ.getReg());
778  }
779  }
780  // Release the special call resource dependence, if this is the beginning
781  // of a call.
782  unsigned CallResource = TRI->getNumRegs();
783  if (LiveRegDefs[CallResource] == SU)
784  for (const SDNode *SUNode = SU->getNode(); SUNode;
785  SUNode = SUNode->getGluedNode()) {
786  if (SUNode->isMachineOpcode() &&
787  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
788  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
789  --NumLiveRegs;
790  LiveRegDefs[CallResource] = nullptr;
791  LiveRegGens[CallResource] = nullptr;
792  releaseInterferences(CallResource);
793  }
794  }
795 
796  resetVRegCycle(SU);
797 
798  SU->isScheduled = true;
799 
800  // Conditions under which the scheduler should eagerly advance the cycle:
801  // (1) No available instructions
802  // (2) All pipelines full, so available instructions must have hazards.
803  //
804  // If HazardRec is disabled, the cycle was pre-advanced before calling
805  // ReleasePredecessors. In that case, IssueCount should remain 0.
806  //
807  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
808  if (HazardRec->isEnabled() || AvgIPC > 1) {
809  if (SU->getNode() && SU->getNode()->isMachineOpcode())
810  ++IssueCount;
811  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
812  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
813  AdvanceToCycle(CurCycle + 1);
814  }
815 }
816 
817 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
818 /// unscheduled, increase the succ left count of its predecessors. Remove
819 /// them from AvailableQueue if necessary.
820 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
821  SUnit *PredSU = PredEdge->getSUnit();
822  if (PredSU->isAvailable) {
823  PredSU->isAvailable = false;
824  if (!PredSU->isPending)
825  AvailableQueue->remove(PredSU);
826  }
827 
829  "NumSuccsLeft will overflow!");
830  ++PredSU->NumSuccsLeft;
831 }
832 
833 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
834 /// its predecessor states to reflect the change.
835 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
836  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
837  LLVM_DEBUG(dumpNode(*SU));
838 
839  for (SDep &Pred : SU->Preds) {
840  CapturePred(&Pred);
841  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
842  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
844  "Physical register dependency violated?");
845  --NumLiveRegs;
846  LiveRegDefs[Pred.getReg()] = nullptr;
847  LiveRegGens[Pred.getReg()] = nullptr;
848  releaseInterferences(Pred.getReg());
849  }
850  }
851 
852  // Reclaim the special call resource dependence, if this is the beginning
853  // of a call.
854  unsigned CallResource = TRI->getNumRegs();
855  for (const SDNode *SUNode = SU->getNode(); SUNode;
856  SUNode = SUNode->getGluedNode()) {
857  if (SUNode->isMachineOpcode() &&
858  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
859  SUnit *SeqEnd = CallSeqEndForStart[SU];
860  assert(SeqEnd && "Call sequence start/end must be known");
861  assert(!LiveRegDefs[CallResource]);
862  assert(!LiveRegGens[CallResource]);
863  ++NumLiveRegs;
864  LiveRegDefs[CallResource] = SU;
865  LiveRegGens[CallResource] = SeqEnd;
866  }
867  }
868 
869  // Release the special call resource dependence, if this is the end
870  // of a call.
871  if (LiveRegGens[CallResource] == SU)
872  for (const SDNode *SUNode = SU->getNode(); SUNode;
873  SUNode = SUNode->getGluedNode()) {
874  if (SUNode->isMachineOpcode() &&
875  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
876  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
877  assert(LiveRegDefs[CallResource]);
878  assert(LiveRegGens[CallResource]);
879  --NumLiveRegs;
880  LiveRegDefs[CallResource] = nullptr;
881  LiveRegGens[CallResource] = nullptr;
882  releaseInterferences(CallResource);
883  }
884  }
885 
886  for (auto &Succ : SU->Succs) {
887  if (Succ.isAssignedRegDep()) {
888  auto Reg = Succ.getReg();
889  if (!LiveRegDefs[Reg])
890  ++NumLiveRegs;
891  // This becomes the nearest def. Note that an earlier def may still be
892  // pending if this is a two-address node.
893  LiveRegDefs[Reg] = SU;
894 
895  // Update LiveRegGen only if was empty before this unscheduling.
896  // This is to avoid incorrect updating LiveRegGen set in previous run.
897  if (!LiveRegGens[Reg]) {
898  // Find the successor with the lowest height.
899  LiveRegGens[Reg] = Succ.getSUnit();
900  for (auto &Succ2 : SU->Succs) {
901  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
902  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
903  LiveRegGens[Reg] = Succ2.getSUnit();
904  }
905  }
906  }
907  }
908  if (SU->getHeight() < MinAvailableCycle)
909  MinAvailableCycle = SU->getHeight();
910 
911  SU->setHeightDirty();
912  SU->isScheduled = false;
913  SU->isAvailable = true;
914  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
915  // Don't make available until backtracking is complete.
916  SU->isPending = true;
917  PendingQueue.push_back(SU);
918  }
919  else {
920  AvailableQueue->push(SU);
921  }
922  AvailableQueue->unscheduledNode(SU);
923 }
924 
925 /// After backtracking, the hazard checker needs to be restored to a state
926 /// corresponding the current cycle.
927 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
928  HazardRec->Reset();
929 
930  unsigned LookAhead = std::min((unsigned)Sequence.size(),
931  HazardRec->getMaxLookAhead());
932  if (LookAhead == 0)
933  return;
934 
935  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
936  unsigned HazardCycle = (*I)->getHeight();
937  for (auto E = Sequence.end(); I != E; ++I) {
938  SUnit *SU = *I;
939  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
940  HazardRec->RecedeCycle();
941  }
942  EmitNode(SU);
943  }
944 }
945 
946 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
947 /// BTCycle in order to schedule a specific node.
948 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
949  SUnit *OldSU = Sequence.back();
950  while (true) {
951  Sequence.pop_back();
952  // FIXME: use ready cycle instead of height
953  CurCycle = OldSU->getHeight();
954  UnscheduleNodeBottomUp(OldSU);
955  AvailableQueue->setCurCycle(CurCycle);
956  if (OldSU == BtSU)
957  break;
958  OldSU = Sequence.back();
959  }
960 
961  assert(!SU->isSucc(OldSU) && "Something is wrong!");
962 
963  RestoreHazardCheckerBottomUp();
964 
965  ReleasePending();
966 
967  ++NumBacktracks;
968 }
969 
970 static bool isOperandOf(const SUnit *SU, SDNode *N) {
971  for (const SDNode *SUNode = SU->getNode(); SUNode;
972  SUNode = SUNode->getGluedNode()) {
973  if (SUNode->isOperandOf(N))
974  return true;
975  }
976  return false;
977 }
978 
979 /// TryUnfold - Attempt to unfold
980 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
981  SDNode *N = SU->getNode();
982  // Use while over if to ease fall through.
983  SmallVector<SDNode *, 2> NewNodes;
984  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
985  return nullptr;
986 
987  // unfolding an x86 DEC64m operation results in store, dec, load which
988  // can't be handled here so quit
989  if (NewNodes.size() == 3)
990  return nullptr;
991 
992  assert(NewNodes.size() == 2 && "Expected a load folding node!");
993 
994  N = NewNodes[1];
995  SDNode *LoadNode = NewNodes[0];
996  unsigned NumVals = N->getNumValues();
997  unsigned OldNumVals = SU->getNode()->getNumValues();
998 
999  // LoadNode may already exist. This can happen when there is another
1000  // load from the same location and producing the same type of value
1001  // but it has different alignment or volatileness.
1002  bool isNewLoad = true;
1003  SUnit *LoadSU;
1004  if (LoadNode->getNodeId() != -1) {
1005  LoadSU = &SUnits[LoadNode->getNodeId()];
1006  // If LoadSU has already been scheduled, we should clone it but
1007  // this would negate the benefit to unfolding so just return SU.
1008  if (LoadSU->isScheduled)
1009  return SU;
1010  isNewLoad = false;
1011  } else {
1012  LoadSU = CreateNewSUnit(LoadNode);
1013  LoadNode->setNodeId(LoadSU->NodeNum);
1014 
1015  InitNumRegDefsLeft(LoadSU);
1016  computeLatency(LoadSU);
1017  }
1018 
1019  bool isNewN = true;
1020  SUnit *NewSU;
1021  // This can only happen when isNewLoad is false.
1022  if (N->getNodeId() != -1) {
1023  NewSU = &SUnits[N->getNodeId()];
1024  // If NewSU has already been scheduled, we need to clone it, but this
1025  // negates the benefit to unfolding so just return SU.
1026  if (NewSU->isScheduled) {
1027  return SU;
1028  }
1029  isNewN = false;
1030  } else {
1031  NewSU = CreateNewSUnit(N);
1032  N->setNodeId(NewSU->NodeNum);
1033 
1034  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1035  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1036  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1037  NewSU->isTwoAddress = true;
1038  break;
1039  }
1040  }
1041  if (MCID.isCommutable())
1042  NewSU->isCommutable = true;
1043 
1044  InitNumRegDefsLeft(NewSU);
1045  computeLatency(NewSU);
1046  }
1047 
1048  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1049 
1050  // Now that we are committed to unfolding replace DAG Uses.
1051  for (unsigned i = 0; i != NumVals; ++i)
1052  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1053  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1054  SDValue(LoadNode, 1));
1055 
1056  // Record all the edges to and from the old SU, by category.
1057  SmallVector<SDep, 4> ChainPreds;
1058  SmallVector<SDep, 4> ChainSuccs;
1059  SmallVector<SDep, 4> LoadPreds;
1060  SmallVector<SDep, 4> NodePreds;
1061  SmallVector<SDep, 4> NodeSuccs;
1062  for (SDep &Pred : SU->Preds) {
1063  if (Pred.isCtrl())
1064  ChainPreds.push_back(Pred);
1065  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1066  LoadPreds.push_back(Pred);
1067  else
1068  NodePreds.push_back(Pred);
1069  }
1070  for (SDep &Succ : SU->Succs) {
1071  if (Succ.isCtrl())
1072  ChainSuccs.push_back(Succ);
1073  else
1074  NodeSuccs.push_back(Succ);
1075  }
1076 
1077  // Now assign edges to the newly-created nodes.
1078  for (const SDep &Pred : ChainPreds) {
1079  RemovePred(SU, Pred);
1080  if (isNewLoad)
1081  AddPredQueued(LoadSU, Pred);
1082  }
1083  for (const SDep &Pred : LoadPreds) {
1084  RemovePred(SU, Pred);
1085  if (isNewLoad)
1086  AddPredQueued(LoadSU, Pred);
1087  }
1088  for (const SDep &Pred : NodePreds) {
1089  RemovePred(SU, Pred);
1090  AddPredQueued(NewSU, Pred);
1091  }
1092  for (SDep D : NodeSuccs) {
1093  SUnit *SuccDep = D.getSUnit();
1094  D.setSUnit(SU);
1095  RemovePred(SuccDep, D);
1096  D.setSUnit(NewSU);
1097  AddPredQueued(SuccDep, D);
1098  // Balance register pressure.
1099  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1100  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1101  --NewSU->NumRegDefsLeft;
1102  }
1103  for (SDep D : ChainSuccs) {
1104  SUnit *SuccDep = D.getSUnit();
1105  D.setSUnit(SU);
1106  RemovePred(SuccDep, D);
1107  if (isNewLoad) {
1108  D.setSUnit(LoadSU);
1109  AddPredQueued(SuccDep, D);
1110  }
1111  }
1112 
1113  // Add a data dependency to reflect that NewSU reads the value defined
1114  // by LoadSU.
1115  SDep D(LoadSU, SDep::Data, 0);
1116  D.setLatency(LoadSU->Latency);
1117  AddPredQueued(NewSU, D);
1118 
1119  if (isNewLoad)
1120  AvailableQueue->addNode(LoadSU);
1121  if (isNewN)
1122  AvailableQueue->addNode(NewSU);
1123 
1124  ++NumUnfolds;
1125 
1126  if (NewSU->NumSuccsLeft == 0)
1127  NewSU->isAvailable = true;
1128 
1129  return NewSU;
1130 }
1131 
1132 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1133 /// successors to the newly created node.
1134 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1135  SDNode *N = SU->getNode();
1136  if (!N)
1137  return nullptr;
1138 
1139  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1140  LLVM_DEBUG(dumpNode(*SU));
1141 
1142  if (N->getGluedNode() &&
1143  !TII->canCopyGluedNodeDuringSchedule(N)) {
1144  LLVM_DEBUG(
1145  dbgs()
1146  << "Giving up because it has incoming glue and the target does not "
1147  "want to copy it\n");
1148  return nullptr;
1149  }
1150 
1151  SUnit *NewSU;
1152  bool TryUnfold = false;
1153  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1154  MVT VT = N->getSimpleValueType(i);
1155  if (VT == MVT::Glue) {
1156  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1157  return nullptr;
1158  } else if (VT == MVT::Other)
1159  TryUnfold = true;
1160  }
1161  for (const SDValue &Op : N->op_values()) {
1162  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1163  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1164  LLVM_DEBUG(
1165  dbgs() << "Giving up because it one of the operands is glue and "
1166  "the target does not want to copy it\n");
1167  return nullptr;
1168  }
1169  }
1170 
1171  // If possible unfold instruction.
1172  if (TryUnfold) {
1173  SUnit *UnfoldSU = TryUnfoldSU(SU);
1174  if (!UnfoldSU)
1175  return nullptr;
1176  SU = UnfoldSU;
1177  N = SU->getNode();
1178  // If this can be scheduled don't bother duplicating and just return
1179  if (SU->NumSuccsLeft == 0)
1180  return SU;
1181  }
1182 
1183  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1184  NewSU = CreateClone(SU);
1185 
1186  // New SUnit has the exact same predecessors.
1187  for (SDep &Pred : SU->Preds)
1188  if (!Pred.isArtificial())
1189  AddPredQueued(NewSU, Pred);
1190 
1191  // Make sure the clone comes after the original. (InstrEmitter assumes
1192  // this ordering.)
1193  AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1194 
1195  // Only copy scheduled successors. Cut them from old node's successor
1196  // list and move them over.
1198  for (SDep &Succ : SU->Succs) {
1199  if (Succ.isArtificial())
1200  continue;
1201  SUnit *SuccSU = Succ.getSUnit();
1202  if (SuccSU->isScheduled) {
1203  SDep D = Succ;
1204  D.setSUnit(NewSU);
1205  AddPredQueued(SuccSU, D);
1206  D.setSUnit(SU);
1207  DelDeps.push_back(std::make_pair(SuccSU, D));
1208  }
1209  }
1210  for (auto &DelDep : DelDeps)
1211  RemovePred(DelDep.first, DelDep.second);
1212 
1213  AvailableQueue->updateNode(SU);
1214  AvailableQueue->addNode(NewSU);
1215 
1216  ++NumDups;
1217  return NewSU;
1218 }
1219 
1220 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1221 /// scheduled successors of the given SUnit to the last copy.
1222 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1223  const TargetRegisterClass *DestRC,
1224  const TargetRegisterClass *SrcRC,
1226  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1227  CopyFromSU->CopySrcRC = SrcRC;
1228  CopyFromSU->CopyDstRC = DestRC;
1229 
1230  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1231  CopyToSU->CopySrcRC = DestRC;
1232  CopyToSU->CopyDstRC = SrcRC;
1233 
1234  // Only copy scheduled successors. Cut them from old node's successor
1235  // list and move them over.
1237  for (SDep &Succ : SU->Succs) {
1238  if (Succ.isArtificial())
1239  continue;
1240  SUnit *SuccSU = Succ.getSUnit();
1241  if (SuccSU->isScheduled) {
1242  SDep D = Succ;
1243  D.setSUnit(CopyToSU);
1244  AddPredQueued(SuccSU, D);
1245  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1246  }
1247  else {
1248  // Avoid scheduling the def-side copy before other successors. Otherwise
1249  // we could introduce another physreg interference on the copy and
1250  // continue inserting copies indefinitely.
1251  AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1252  }
1253  }
1254  for (auto &DelDep : DelDeps)
1255  RemovePred(DelDep.first, DelDep.second);
1256 
1257  SDep FromDep(SU, SDep::Data, Reg);
1258  FromDep.setLatency(SU->Latency);
1259  AddPredQueued(CopyFromSU, FromDep);
1260  SDep ToDep(CopyFromSU, SDep::Data, 0);
1261  ToDep.setLatency(CopyFromSU->Latency);
1262  AddPredQueued(CopyToSU, ToDep);
1263 
1264  AvailableQueue->updateNode(SU);
1265  AvailableQueue->addNode(CopyFromSU);
1266  AvailableQueue->addNode(CopyToSU);
1267  Copies.push_back(CopyFromSU);
1268  Copies.push_back(CopyToSU);
1269 
1270  ++NumPRCopies;
1271 }
1272 
1273 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1274 /// definition of the specified node.
1275 /// FIXME: Move to SelectionDAG?
1276 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1277  const TargetInstrInfo *TII) {
1278  unsigned NumRes;
1279  if (N->getOpcode() == ISD::CopyFromReg) {
1280  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1281  NumRes = 1;
1282  } else {
1283  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1284  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1285  NumRes = MCID.getNumDefs();
1286  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1287  if (Reg == *ImpDef)
1288  break;
1289  ++NumRes;
1290  }
1291  }
1292  return N->getSimpleValueType(NumRes);
1293 }
1294 
1295 /// CheckForLiveRegDef - Return true and update live register vector if the
1296 /// specified register def of the specified SUnit clobbers any "live" registers.
1297 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1298  SUnit **LiveRegDefs,
1299  SmallSet<unsigned, 4> &RegAdded,
1301  const TargetRegisterInfo *TRI) {
1302  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1303 
1304  // Check if Ref is live.
1305  if (!LiveRegDefs[*AliasI]) continue;
1306 
1307  // Allow multiple uses of the same def.
1308  if (LiveRegDefs[*AliasI] == SU) continue;
1309 
1310  // Add Reg to the set of interfering live regs.
1311  if (RegAdded.insert(*AliasI).second) {
1312  LRegs.push_back(*AliasI);
1313  }
1314  }
1315 }
1316 
1317 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1318 /// by RegMask, and add them to LRegs.
1319 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1320  ArrayRef<SUnit*> LiveRegDefs,
1321  SmallSet<unsigned, 4> &RegAdded,
1322  SmallVectorImpl<unsigned> &LRegs) {
1323  // Look at all live registers. Skip Reg0 and the special CallResource.
1324  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1325  if (!LiveRegDefs[i]) continue;
1326  if (LiveRegDefs[i] == SU) continue;
1327  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1328  if (RegAdded.insert(i).second)
1329  LRegs.push_back(i);
1330  }
1331 }
1332 
1333 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1334 static const uint32_t *getNodeRegMask(const SDNode *N) {
1335  for (const SDValue &Op : N->op_values())
1336  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1337  return RegOp->getRegMask();
1338  return nullptr;
1339 }
1340 
1341 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1342 /// scheduling of the given node to satisfy live physical register dependencies.
1343 /// If the specific node is the last one that's available to schedule, do
1344 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1345 bool ScheduleDAGRRList::
1346 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1347  if (NumLiveRegs == 0)
1348  return false;
1349 
1350  SmallSet<unsigned, 4> RegAdded;
1351  // If this node would clobber any "live" register, then it's not ready.
1352  //
1353  // If SU is the currently live definition of the same register that it uses,
1354  // then we are free to schedule it.
1355  for (SDep &Pred : SU->Preds) {
1356  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1357  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1358  RegAdded, LRegs, TRI);
1359  }
1360 
1361  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1362  if (Node->getOpcode() == ISD::INLINEASM ||
1363  Node->getOpcode() == ISD::INLINEASM_BR) {
1364  // Inline asm can clobber physical defs.
1365  unsigned NumOps = Node->getNumOperands();
1366  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1367  --NumOps; // Ignore the glue operand.
1368 
1369  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1370  unsigned Flags =
1371  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1372  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1373 
1374  ++i; // Skip the ID value.
1375  if (InlineAsm::isRegDefKind(Flags) ||
1377  InlineAsm::isClobberKind(Flags)) {
1378  // Check for def of register or earlyclobber register.
1379  for (; NumVals; --NumVals, ++i) {
1380  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1382  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1383  }
1384  } else
1385  i += NumVals;
1386  }
1387  continue;
1388  }
1389 
1390  if (!Node->isMachineOpcode())
1391  continue;
1392  // If we're in the middle of scheduling a call, don't begin scheduling
1393  // another call. Also, don't allow any physical registers to be live across
1394  // the call.
1395  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1396  // Check the special calling-sequence resource.
1397  unsigned CallResource = TRI->getNumRegs();
1398  if (LiveRegDefs[CallResource]) {
1399  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1400  while (SDNode *Glued = Gen->getGluedNode())
1401  Gen = Glued;
1402  if (!IsChainDependent(Gen, Node, 0, TII) &&
1403  RegAdded.insert(CallResource).second)
1404  LRegs.push_back(CallResource);
1405  }
1406  }
1407  if (const uint32_t *RegMask = getNodeRegMask(Node))
1408  CheckForLiveRegDefMasked(SU, RegMask,
1409  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1410  RegAdded, LRegs);
1411 
1412  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1413  if (MCID.hasOptionalDef()) {
1414  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1415  // This operand can be either a def of CPSR, if the S bit is set; or a use
1416  // of %noreg. When the OptionalDef is set to a valid register, we need to
1417  // handle it in the same way as an ImplicitDef.
1418  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1419  if (MCID.OpInfo[i].isOptionalDef()) {
1420  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1421  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1422  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1423  }
1424  }
1425  if (!MCID.ImplicitDefs)
1426  continue;
1427  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1428  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1429  }
1430 
1431  return !LRegs.empty();
1432 }
1433 
1434 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1435  // Add the nodes that aren't ready back onto the available list.
1436  for (unsigned i = Interferences.size(); i > 0; --i) {
1437  SUnit *SU = Interferences[i-1];
1438  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1439  if (Reg) {
1440  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1441  if (!is_contained(LRegs, Reg))
1442  continue;
1443  }
1444  SU->isPending = false;
1445  // The interfering node may no longer be available due to backtracking.
1446  // Furthermore, it may have been made available again, in which case it is
1447  // now already in the AvailableQueue.
1448  if (SU->isAvailable && !SU->NodeQueueId) {
1449  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1450  AvailableQueue->push(SU);
1451  }
1452  if (i < Interferences.size())
1453  Interferences[i-1] = Interferences.back();
1454  Interferences.pop_back();
1455  LRegsMap.erase(LRegsPos);
1456  }
1457 }
1458 
1459 /// Return a node that can be scheduled in this cycle. Requirements:
1460 /// (1) Ready: latency has been satisfied
1461 /// (2) No Hazards: resources are available
1462 /// (3) No Interferences: may unschedule to break register interferences.
1463 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1464  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1465  auto FindAvailableNode = [&]() {
1466  while (CurSU) {
1468  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1469  break;
1470  LLVM_DEBUG(dbgs() << " Interfering reg ";
1471  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1472  else dbgs() << printReg(LRegs[0], TRI);
1473  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1474  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1475  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1476  if (LRegsPair.second) {
1477  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1478  Interferences.push_back(CurSU);
1479  }
1480  else {
1481  assert(CurSU->isPending && "Interferences are pending");
1482  // Update the interference with current live regs.
1483  LRegsPair.first->second = LRegs;
1484  }
1485  CurSU = AvailableQueue->pop();
1486  }
1487  };
1488  FindAvailableNode();
1489  if (CurSU)
1490  return CurSU;
1491 
1492  // We query the topological order in the loop body, so make sure outstanding
1493  // updates are applied before entering it (we only enter the loop if there
1494  // are some interferences). If we make changes to the ordering, we exit
1495  // the loop.
1496 
1497  // All candidates are delayed due to live physical reg dependencies.
1498  // Try backtracking, code duplication, or inserting cross class copies
1499  // to resolve it.
1500  for (SUnit *TrySU : Interferences) {
1501  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1502 
1503  // Try unscheduling up to the point where it's safe to schedule
1504  // this node.
1505  SUnit *BtSU = nullptr;
1506  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1507  for (unsigned Reg : LRegs) {
1508  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1509  BtSU = LiveRegGens[Reg];
1510  LiveCycle = BtSU->getHeight();
1511  }
1512  }
1513  if (!WillCreateCycle(TrySU, BtSU)) {
1514  // BacktrackBottomUp mutates Interferences!
1515  BacktrackBottomUp(TrySU, BtSU);
1516 
1517  // Force the current node to be scheduled before the node that
1518  // requires the physical reg dep.
1519  if (BtSU->isAvailable) {
1520  BtSU->isAvailable = false;
1521  if (!BtSU->isPending)
1522  AvailableQueue->remove(BtSU);
1523  }
1524  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1525  << ") to SU(" << TrySU->NodeNum << ")\n");
1526  AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1527 
1528  // If one or more successors has been unscheduled, then the current
1529  // node is no longer available.
1530  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1531  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1532  CurSU = AvailableQueue->pop();
1533  } else {
1534  LLVM_DEBUG(dbgs() << "TrySU available\n");
1535  // Available and in AvailableQueue
1536  AvailableQueue->remove(TrySU);
1537  CurSU = TrySU;
1538  }
1539  FindAvailableNode();
1540  // Interferences has been mutated. We must break.
1541  break;
1542  }
1543  }
1544 
1545  if (!CurSU) {
1546  // Can't backtrack. If it's too expensive to copy the value, then try
1547  // duplicate the nodes that produces these "too expensive to copy"
1548  // values to break the dependency. In case even that doesn't work,
1549  // insert cross class copies.
1550  // If it's not too expensive, i.e. cost != -1, issue copies.
1551  SUnit *TrySU = Interferences[0];
1552  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1553  assert(LRegs.size() == 1 && "Can't handle this yet!");
1554  unsigned Reg = LRegs[0];
1555  SUnit *LRDef = LiveRegDefs[Reg];
1556  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1557  const TargetRegisterClass *RC =
1558  TRI->getMinimalPhysRegClass(Reg, VT);
1559  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1560 
1561  // If cross copy register class is the same as RC, then it must be possible
1562  // copy the value directly. Do not try duplicate the def.
1563  // If cross copy register class is not the same as RC, then it's possible to
1564  // copy the value but it require cross register class copies and it is
1565  // expensive.
1566  // If cross copy register class is null, then it's not possible to copy
1567  // the value at all.
1568  SUnit *NewDef = nullptr;
1569  if (DestRC != RC) {
1570  NewDef = CopyAndMoveSuccessors(LRDef);
1571  if (!DestRC && !NewDef)
1572  report_fatal_error("Can't handle live physical register dependency!");
1573  }
1574  if (!NewDef) {
1575  // Issue copies, these can be expensive cross register class copies.
1577  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1578  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1579  << " to SU #" << Copies.front()->NodeNum << "\n");
1580  AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1581  NewDef = Copies.back();
1582  }
1583 
1584  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1585  << " to SU #" << TrySU->NodeNum << "\n");
1586  LiveRegDefs[Reg] = NewDef;
1587  AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1588  TrySU->isAvailable = false;
1589  CurSU = NewDef;
1590  }
1591  assert(CurSU && "Unable to resolve live physical register dependencies!");
1592  return CurSU;
1593 }
1594 
1595 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1596 /// schedulers.
1597 void ScheduleDAGRRList::ListScheduleBottomUp() {
1598  // Release any predecessors of the special Exit node.
1599  ReleasePredecessors(&ExitSU);
1600 
1601  // Add root to Available queue.
1602  if (!SUnits.empty()) {
1603  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1604  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1605  RootSU->isAvailable = true;
1606  AvailableQueue->push(RootSU);
1607  }
1608 
1609  // While Available queue is not empty, grab the node with the highest
1610  // priority. If it is not ready put it back. Schedule the node.
1611  Sequence.reserve(SUnits.size());
1612  while (!AvailableQueue->empty() || !Interferences.empty()) {
1613  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1614  AvailableQueue->dump(this));
1615 
1616  // Pick the best node to schedule taking all constraints into
1617  // consideration.
1618  SUnit *SU = PickNodeToScheduleBottomUp();
1619 
1620  AdvancePastStalls(SU);
1621 
1622  ScheduleNodeBottomUp(SU);
1623 
1624  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1625  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1626  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1627  "MinAvailableCycle uninitialized");
1628  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1629  }
1630  }
1631 
1632  // Reverse the order if it is bottom up.
1633  std::reverse(Sequence.begin(), Sequence.end());
1634 
1635 #ifndef NDEBUG
1636  VerifyScheduledSequence(/*isBottomUp=*/true);
1637 #endif
1638 }
1639 
1640 namespace {
1641 
1642 class RegReductionPQBase;
1643 
1644 struct queue_sort {
1645  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1646 };
1647 
1648 #ifndef NDEBUG
1649 template<class SF>
1650 struct reverse_sort : public queue_sort {
1651  SF &SortFunc;
1652 
1653  reverse_sort(SF &sf) : SortFunc(sf) {}
1654 
1655  bool operator()(SUnit* left, SUnit* right) const {
1656  // reverse left/right rather than simply !SortFunc(left, right)
1657  // to expose different paths in the comparison logic.
1658  return SortFunc(right, left);
1659  }
1660 };
1661 #endif // NDEBUG
1662 
1663 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1664 // reduction scheduler.
1665 struct bu_ls_rr_sort : public queue_sort {
1666  enum {
1667  IsBottomUp = true,
1668  HasReadyFilter = false
1669  };
1670 
1671  RegReductionPQBase *SPQ;
1672 
1673  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1674 
1675  bool operator()(SUnit* left, SUnit* right) const;
1676 };
1677 
1678 // src_ls_rr_sort - Priority function for source order scheduler.
1679 struct src_ls_rr_sort : public queue_sort {
1680  enum {
1681  IsBottomUp = true,
1682  HasReadyFilter = false
1683  };
1684 
1685  RegReductionPQBase *SPQ;
1686 
1687  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1688 
1689  bool operator()(SUnit* left, SUnit* right) const;
1690 };
1691 
1692 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1693 struct hybrid_ls_rr_sort : public queue_sort {
1694  enum {
1695  IsBottomUp = true,
1696  HasReadyFilter = false
1697  };
1698 
1699  RegReductionPQBase *SPQ;
1700 
1701  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1702 
1703  bool isReady(SUnit *SU, unsigned CurCycle) const;
1704 
1705  bool operator()(SUnit* left, SUnit* right) const;
1706 };
1707 
1708 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1709 // scheduler.
1710 struct ilp_ls_rr_sort : public queue_sort {
1711  enum {
1712  IsBottomUp = true,
1713  HasReadyFilter = false
1714  };
1715 
1716  RegReductionPQBase *SPQ;
1717 
1718  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1719 
1720  bool isReady(SUnit *SU, unsigned CurCycle) const;
1721 
1722  bool operator()(SUnit* left, SUnit* right) const;
1723 };
1724 
1725 class RegReductionPQBase : public SchedulingPriorityQueue {
1726 protected:
1727  std::vector<SUnit *> Queue;
1728  unsigned CurQueueId = 0;
1729  bool TracksRegPressure;
1730  bool SrcOrder;
1731 
1732  // SUnits - The SUnits for the current graph.
1733  std::vector<SUnit> *SUnits;
1734 
1735  MachineFunction &MF;
1736  const TargetInstrInfo *TII;
1737  const TargetRegisterInfo *TRI;
1738  const TargetLowering *TLI;
1739  ScheduleDAGRRList *scheduleDAG = nullptr;
1740 
1741  // SethiUllmanNumbers - The SethiUllman number for each node.
1742  std::vector<unsigned> SethiUllmanNumbers;
1743 
1744  /// RegPressure - Tracking current reg pressure per register class.
1745  std::vector<unsigned> RegPressure;
1746 
1747  /// RegLimit - Tracking the number of allocatable registers per register
1748  /// class.
1749  std::vector<unsigned> RegLimit;
1750 
1751 public:
1752  RegReductionPQBase(MachineFunction &mf,
1753  bool hasReadyFilter,
1754  bool tracksrp,
1755  bool srcorder,
1756  const TargetInstrInfo *tii,
1757  const TargetRegisterInfo *tri,
1758  const TargetLowering *tli)
1759  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1760  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1761  if (TracksRegPressure) {
1762  unsigned NumRC = TRI->getNumRegClasses();
1763  RegLimit.resize(NumRC);
1764  RegPressure.resize(NumRC);
1765  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1766  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1767  for (const TargetRegisterClass *RC : TRI->regclasses())
1768  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1769  }
1770  }
1771 
1772  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1773  scheduleDAG = scheduleDag;
1774  }
1775 
1776  ScheduleHazardRecognizer* getHazardRec() {
1777  return scheduleDAG->getHazardRec();
1778  }
1779 
1780  void initNodes(std::vector<SUnit> &sunits) override;
1781 
1782  void addNode(const SUnit *SU) override;
1783 
1784  void updateNode(const SUnit *SU) override;
1785 
1786  void releaseState() override {
1787  SUnits = nullptr;
1788  SethiUllmanNumbers.clear();
1789  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1790  }
1791 
1792  unsigned getNodePriority(const SUnit *SU) const;
1793 
1794  unsigned getNodeOrdering(const SUnit *SU) const {
1795  if (!SU->getNode()) return 0;
1796 
1797  return SU->getNode()->getIROrder();
1798  }
1799 
1800  bool empty() const override { return Queue.empty(); }
1801 
1802  void push(SUnit *U) override {
1803  assert(!U->NodeQueueId && "Node in the queue already");
1804  U->NodeQueueId = ++CurQueueId;
1805  Queue.push_back(U);
1806  }
1807 
1808  void remove(SUnit *SU) override {
1809  assert(!Queue.empty() && "Queue is empty!");
1810  assert(SU->NodeQueueId != 0 && "Not in queue!");
1811  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1812  if (I != std::prev(Queue.end()))
1813  std::swap(*I, Queue.back());
1814  Queue.pop_back();
1815  SU->NodeQueueId = 0;
1816  }
1817 
1818  bool tracksRegPressure() const override { return TracksRegPressure; }
1819 
1820  void dumpRegPressure() const;
1821 
1822  bool HighRegPressure(const SUnit *SU) const;
1823 
1824  bool MayReduceRegPressure(SUnit *SU) const;
1825 
1826  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1827 
1828  void scheduledNode(SUnit *SU) override;
1829 
1830  void unscheduledNode(SUnit *SU) override;
1831 
1832 protected:
1833  bool canClobber(const SUnit *SU, const SUnit *Op);
1834  void AddPseudoTwoAddrDeps();
1835  void PrescheduleNodesWithMultipleUses();
1836  void CalculateSethiUllmanNumbers();
1837 };
1838 
1839 template<class SF>
1840 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1841  std::vector<SUnit *>::iterator Best = Q.begin();
1842  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1843  if (Picker(*Best, *I))
1844  Best = I;
1845  SUnit *V = *Best;
1846  if (Best != std::prev(Q.end()))
1847  std::swap(*Best, Q.back());
1848  Q.pop_back();
1849  return V;
1850 }
1851 
1852 template<class SF>
1853 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1854 #ifndef NDEBUG
1855  if (DAG->StressSched) {
1856  reverse_sort<SF> RPicker(Picker);
1857  return popFromQueueImpl(Q, RPicker);
1858  }
1859 #endif
1860  (void)DAG;
1861  return popFromQueueImpl(Q, Picker);
1862 }
1863 
1864 //===----------------------------------------------------------------------===//
1865 // RegReductionPriorityQueue Definition
1866 //===----------------------------------------------------------------------===//
1867 //
1868 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1869 // to reduce register pressure.
1870 //
1871 template<class SF>
1872 class RegReductionPriorityQueue : public RegReductionPQBase {
1873  SF Picker;
1874 
1875 public:
1876  RegReductionPriorityQueue(MachineFunction &mf,
1877  bool tracksrp,
1878  bool srcorder,
1879  const TargetInstrInfo *tii,
1880  const TargetRegisterInfo *tri,
1881  const TargetLowering *tli)
1882  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1883  tii, tri, tli),
1884  Picker(this) {}
1885 
1886  bool isBottomUp() const override { return SF::IsBottomUp; }
1887 
1888  bool isReady(SUnit *U) const override {
1889  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1890  }
1891 
1892  SUnit *pop() override {
1893  if (Queue.empty()) return nullptr;
1894 
1895  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1896  V->NodeQueueId = 0;
1897  return V;
1898  }
1899 
1900 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1901  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1902  // Emulate pop() without clobbering NodeQueueIds.
1903  std::vector<SUnit *> DumpQueue = Queue;
1904  SF DumpPicker = Picker;
1905  while (!DumpQueue.empty()) {
1906  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1907  dbgs() << "Height " << SU->getHeight() << ": ";
1908  DAG->dumpNode(*SU);
1909  }
1910  }
1911 #endif
1912 };
1913 
1914 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1915 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1916 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1917 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1918 
1919 } // end anonymous namespace
1920 
1921 //===----------------------------------------------------------------------===//
1922 // Static Node Priority for Register Pressure Reduction
1923 //===----------------------------------------------------------------------===//
1924 
1925 // Check for special nodes that bypass scheduling heuristics.
1926 // Currently this pushes TokenFactor nodes down, but may be used for other
1927 // pseudo-ops as well.
1928 //
1929 // Return -1 to schedule right above left, 1 for left above right.
1930 // Return 0 if no bias exists.
1931 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1932  bool LSchedLow = left->isScheduleLow;
1933  bool RSchedLow = right->isScheduleLow;
1934  if (LSchedLow != RSchedLow)
1935  return LSchedLow < RSchedLow ? 1 : -1;
1936  return 0;
1937 }
1938 
1939 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1940 /// Smaller number is the higher priority.
1941 static unsigned
1942 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1943  if (SUNumbers[SU->NodeNum] != 0)
1944  return SUNumbers[SU->NodeNum];
1945 
1946  // Use WorkList to avoid stack overflow on excessively large IRs.
1947  struct WorkState {
1948  WorkState(const SUnit *SU) : SU(SU) {}
1949  const SUnit *SU;
1950  unsigned PredsProcessed = 0;
1951  };
1952 
1953  SmallVector<WorkState, 16> WorkList;
1954  WorkList.push_back(SU);
1955  while (!WorkList.empty()) {
1956  auto &Temp = WorkList.back();
1957  auto *TempSU = Temp.SU;
1958  bool AllPredsKnown = true;
1959  // Try to find a non-evaluated pred and push it into the processing stack.
1960  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1961  auto &Pred = TempSU->Preds[P];
1962  if (Pred.isCtrl()) continue; // ignore chain preds
1963  SUnit *PredSU = Pred.getSUnit();
1964  if (SUNumbers[PredSU->NodeNum] == 0) {
1965 #ifndef NDEBUG
1966  // In debug mode, check that we don't have such element in the stack.
1967  for (auto It : WorkList)
1968  assert(It.SU != PredSU && "Trying to push an element twice?");
1969 #endif
1970  // Next time start processing this one starting from the next pred.
1971  Temp.PredsProcessed = P + 1;
1972  WorkList.push_back(PredSU);
1973  AllPredsKnown = false;
1974  break;
1975  }
1976  }
1977 
1978  if (!AllPredsKnown)
1979  continue;
1980 
1981  // Once all preds are known, we can calculate the answer for this one.
1982  unsigned SethiUllmanNumber = 0;
1983  unsigned Extra = 0;
1984  for (const SDep &Pred : TempSU->Preds) {
1985  if (Pred.isCtrl()) continue; // ignore chain preds
1986  SUnit *PredSU = Pred.getSUnit();
1987  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1988  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1989  if (PredSethiUllman > SethiUllmanNumber) {
1990  SethiUllmanNumber = PredSethiUllman;
1991  Extra = 0;
1992  } else if (PredSethiUllman == SethiUllmanNumber)
1993  ++Extra;
1994  }
1995 
1996  SethiUllmanNumber += Extra;
1997  if (SethiUllmanNumber == 0)
1998  SethiUllmanNumber = 1;
1999  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2000  WorkList.pop_back();
2001  }
2002 
2003  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2004  return SUNumbers[SU->NodeNum];
2005 }
2006 
2007 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2008 /// scheduling units.
2009 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2010  SethiUllmanNumbers.assign(SUnits->size(), 0);
2011 
2012  for (const SUnit &SU : *SUnits)
2013  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2014 }
2015 
2016 void RegReductionPQBase::addNode(const SUnit *SU) {
2017  unsigned SUSize = SethiUllmanNumbers.size();
2018  if (SUnits->size() > SUSize)
2019  SethiUllmanNumbers.resize(SUSize*2, 0);
2020  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2021 }
2022 
2023 void RegReductionPQBase::updateNode(const SUnit *SU) {
2024  SethiUllmanNumbers[SU->NodeNum] = 0;
2025  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2026 }
2027 
2028 // Lower priority means schedule further down. For bottom-up scheduling, lower
2029 // priority SUs are scheduled before higher priority SUs.
2030 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2031  assert(SU->NodeNum < SethiUllmanNumbers.size());
2032  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2033  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2034  // CopyToReg should be close to its uses to facilitate coalescing and
2035  // avoid spilling.
2036  return 0;
2037  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2038  Opc == TargetOpcode::SUBREG_TO_REG ||
2039  Opc == TargetOpcode::INSERT_SUBREG)
2040  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2041  // close to their uses to facilitate coalescing.
2042  return 0;
2043  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2044  // If SU does not have a register use, i.e. it doesn't produce a value
2045  // that would be consumed (e.g. store), then it terminates a chain of
2046  // computation. Give it a large SethiUllman number so it will be
2047  // scheduled right before its predecessors that it doesn't lengthen
2048  // their live ranges.
2049  return 0xffff;
2050  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2051  // If SU does not have a register def, schedule it close to its uses
2052  // because it does not lengthen any live ranges.
2053  return 0;
2054 #if 1
2055  return SethiUllmanNumbers[SU->NodeNum];
2056 #else
2057  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2058  if (SU->isCallOp) {
2059  // FIXME: This assumes all of the defs are used as call operands.
2060  int NP = (int)Priority - SU->getNode()->getNumValues();
2061  return (NP > 0) ? NP : 0;
2062  }
2063  return Priority;
2064 #endif
2065 }
2066 
2067 //===----------------------------------------------------------------------===//
2068 // Register Pressure Tracking
2069 //===----------------------------------------------------------------------===//
2070 
2071 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2072 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2073  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2074  unsigned Id = RC->getID();
2075  unsigned RP = RegPressure[Id];
2076  if (!RP) continue;
2077  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2078  << RegLimit[Id] << '\n');
2079  }
2080 }
2081 #endif
2082 
2083 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2084  if (!TLI)
2085  return false;
2086 
2087  for (const SDep &Pred : SU->Preds) {
2088  if (Pred.isCtrl())
2089  continue;
2090  SUnit *PredSU = Pred.getSUnit();
2091  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2092  // to cover the number of registers defined (they are all live).
2093  if (PredSU->NumRegDefsLeft == 0) {
2094  continue;
2095  }
2096  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2097  RegDefPos.IsValid(); RegDefPos.Advance()) {
2098  unsigned RCId, Cost;
2099  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2100 
2101  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2102  return true;
2103  }
2104  }
2105  return false;
2106 }
2107 
2108 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2109  const SDNode *N = SU->getNode();
2110 
2111  if (!N->isMachineOpcode() || !SU->NumSuccs)
2112  return false;
2113 
2114  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2115  for (unsigned i = 0; i != NumDefs; ++i) {
2116  MVT VT = N->getSimpleValueType(i);
2117  if (!N->hasAnyUseOfValue(i))
2118  continue;
2119  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2120  if (RegPressure[RCId] >= RegLimit[RCId])
2121  return true;
2122  }
2123  return false;
2124 }
2125 
2126 // Compute the register pressure contribution by this instruction by count up
2127 // for uses that are not live and down for defs. Only count register classes
2128 // that are already under high pressure. As a side effect, compute the number of
2129 // uses of registers that are already live.
2130 //
2131 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2132 // so could probably be factored.
2133 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2134  LiveUses = 0;
2135  int PDiff = 0;
2136  for (const SDep &Pred : SU->Preds) {
2137  if (Pred.isCtrl())
2138  continue;
2139  SUnit *PredSU = Pred.getSUnit();
2140  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2141  // to cover the number of registers defined (they are all live).
2142  if (PredSU->NumRegDefsLeft == 0) {
2143  if (PredSU->getNode()->isMachineOpcode())
2144  ++LiveUses;
2145  continue;
2146  }
2147  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2148  RegDefPos.IsValid(); RegDefPos.Advance()) {
2149  MVT VT = RegDefPos.GetValue();
2150  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2151  if (RegPressure[RCId] >= RegLimit[RCId])
2152  ++PDiff;
2153  }
2154  }
2155  const SDNode *N = SU->getNode();
2156 
2157  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2158  return PDiff;
2159 
2160  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2161  for (unsigned i = 0; i != NumDefs; ++i) {
2162  MVT VT = N->getSimpleValueType(i);
2163  if (!N->hasAnyUseOfValue(i))
2164  continue;
2165  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2166  if (RegPressure[RCId] >= RegLimit[RCId])
2167  --PDiff;
2168  }
2169  return PDiff;
2170 }
2171 
2172 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2173  if (!TracksRegPressure)
2174  return;
2175 
2176  if (!SU->getNode())
2177  return;
2178 
2179  for (const SDep &Pred : SU->Preds) {
2180  if (Pred.isCtrl())
2181  continue;
2182  SUnit *PredSU = Pred.getSUnit();
2183  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2184  // to cover the number of registers defined (they are all live).
2185  if (PredSU->NumRegDefsLeft == 0) {
2186  continue;
2187  }
2188  // FIXME: The ScheduleDAG currently loses information about which of a
2189  // node's values is consumed by each dependence. Consequently, if the node
2190  // defines multiple register classes, we don't know which to pressurize
2191  // here. Instead the following loop consumes the register defs in an
2192  // arbitrary order. At least it handles the common case of clustered loads
2193  // to the same class. For precise liveness, each SDep needs to indicate the
2194  // result number. But that tightly couples the ScheduleDAG with the
2195  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2196  // value type or register class to SDep.
2197  //
2198  // The most important aspect of register tracking is balancing the increase
2199  // here with the reduction further below. Note that this SU may use multiple
2200  // defs in PredSU. The can't be determined here, but we've already
2201  // compensated by reducing NumRegDefsLeft in PredSU during
2202  // ScheduleDAGSDNodes::AddSchedEdges.
2203  --PredSU->NumRegDefsLeft;
2204  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2205  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2206  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2207  if (SkipRegDefs)
2208  continue;
2209 
2210  unsigned RCId, Cost;
2211  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2212  RegPressure[RCId] += Cost;
2213  break;
2214  }
2215  }
2216 
2217  // We should have this assert, but there may be dead SDNodes that never
2218  // materialize as SUnits, so they don't appear to generate liveness.
2219  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2220  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2221  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2222  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2223  if (SkipRegDefs > 0)
2224  continue;
2225  unsigned RCId, Cost;
2226  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2227  if (RegPressure[RCId] < Cost) {
2228  // Register pressure tracking is imprecise. This can happen. But we try
2229  // hard not to let it happen because it likely results in poor scheduling.
2230  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2231  << ") has too many regdefs\n");
2232  RegPressure[RCId] = 0;
2233  }
2234  else {
2235  RegPressure[RCId] -= Cost;
2236  }
2237  }
2238  LLVM_DEBUG(dumpRegPressure());
2239 }
2240 
2241 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2242  if (!TracksRegPressure)
2243  return;
2244 
2245  const SDNode *N = SU->getNode();
2246  if (!N) return;
2247 
2248  if (!N->isMachineOpcode()) {
2249  if (N->getOpcode() != ISD::CopyToReg)
2250  return;
2251  } else {
2252  unsigned Opc = N->getMachineOpcode();
2253  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2254  Opc == TargetOpcode::INSERT_SUBREG ||
2255  Opc == TargetOpcode::SUBREG_TO_REG ||
2256  Opc == TargetOpcode::REG_SEQUENCE ||
2257  Opc == TargetOpcode::IMPLICIT_DEF)
2258  return;
2259  }
2260 
2261  for (const SDep &Pred : SU->Preds) {
2262  if (Pred.isCtrl())
2263  continue;
2264  SUnit *PredSU = Pred.getSUnit();
2265  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2266  // counts data deps.
2267  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2268  continue;
2269  const SDNode *PN = PredSU->getNode();
2270  if (!PN->isMachineOpcode()) {
2271  if (PN->getOpcode() == ISD::CopyFromReg) {
2272  MVT VT = PN->getSimpleValueType(0);
2273  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2274  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2275  }
2276  continue;
2277  }
2278  unsigned POpc = PN->getMachineOpcode();
2279  if (POpc == TargetOpcode::IMPLICIT_DEF)
2280  continue;
2281  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2282  POpc == TargetOpcode::INSERT_SUBREG ||
2283  POpc == TargetOpcode::SUBREG_TO_REG) {
2284  MVT VT = PN->getSimpleValueType(0);
2285  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2286  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2287  continue;
2288  }
2289  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2290  for (unsigned i = 0; i != NumDefs; ++i) {
2291  MVT VT = PN->getSimpleValueType(i);
2292  if (!PN->hasAnyUseOfValue(i))
2293  continue;
2294  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2295  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2296  // Register pressure tracking is imprecise. This can happen.
2297  RegPressure[RCId] = 0;
2298  else
2299  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2300  }
2301  }
2302 
2303  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2304  // may transfer data dependencies to CopyToReg.
2305  if (SU->NumSuccs && N->isMachineOpcode()) {
2306  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2307  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2308  MVT VT = N->getSimpleValueType(i);
2309  if (VT == MVT::Glue || VT == MVT::Other)
2310  continue;
2311  if (!N->hasAnyUseOfValue(i))
2312  continue;
2313  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2314  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2315  }
2316  }
2317 
2318  LLVM_DEBUG(dumpRegPressure());
2319 }
2320 
2321 //===----------------------------------------------------------------------===//
2322 // Dynamic Node Priority for Register Pressure Reduction
2323 //===----------------------------------------------------------------------===//
2324 
2325 /// closestSucc - Returns the scheduled cycle of the successor which is
2326 /// closest to the current cycle.
2327 static unsigned closestSucc(const SUnit *SU) {
2328  unsigned MaxHeight = 0;
2329  for (const SDep &Succ : SU->Succs) {
2330  if (Succ.isCtrl()) continue; // ignore chain succs
2331  unsigned Height = Succ.getSUnit()->getHeight();
2332  // If there are bunch of CopyToRegs stacked up, they should be considered
2333  // to be at the same position.
2334  if (Succ.getSUnit()->getNode() &&
2335  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2336  Height = closestSucc(Succ.getSUnit())+1;
2337  if (Height > MaxHeight)
2338  MaxHeight = Height;
2339  }
2340  return MaxHeight;
2341 }
2342 
2343 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2344 /// for scratch registers, i.e. number of data dependencies.
2345 static unsigned calcMaxScratches(const SUnit *SU) {
2346  unsigned Scratches = 0;
2347  for (const SDep &Pred : SU->Preds) {
2348  if (Pred.isCtrl()) continue; // ignore chain preds
2349  Scratches++;
2350  }
2351  return Scratches;
2352 }
2353 
2354 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2355 /// CopyFromReg from a virtual register.
2356 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2357  bool RetVal = false;
2358  for (const SDep &Pred : SU->Preds) {
2359  if (Pred.isCtrl()) continue;
2360  const SUnit *PredSU = Pred.getSUnit();
2361  if (PredSU->getNode() &&
2362  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2363  unsigned Reg =
2364  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2365  if (Register::isVirtualRegister(Reg)) {
2366  RetVal = true;
2367  continue;
2368  }
2369  }
2370  return false;
2371  }
2372  return RetVal;
2373 }
2374 
2375 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2376 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2377 /// it has no other use. It should be scheduled closer to the terminator.
2378 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2379  bool RetVal = false;
2380  for (const SDep &Succ : SU->Succs) {
2381  if (Succ.isCtrl()) continue;
2382  const SUnit *SuccSU = Succ.getSUnit();
2383  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2384  unsigned Reg =
2385  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2386  if (Register::isVirtualRegister(Reg)) {
2387  RetVal = true;
2388  continue;
2389  }
2390  }
2391  return false;
2392  }
2393  return RetVal;
2394 }
2395 
2396 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2397 // set isVRegCycle for its CopyFromReg operands.
2398 //
2399 // This is only relevant for single-block loops, in which case the VRegCycle
2400 // node is likely an induction variable in which the operand and target virtual
2401 // registers should be coalesced (e.g. pre/post increment values). Setting the
2402 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2403 // CopyFromReg so that this node becomes the virtual register "kill". This
2404 // avoids interference between the values live in and out of the block and
2405 // eliminates a copy inside the loop.
2406 static void initVRegCycle(SUnit *SU) {
2408  return;
2409 
2410  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2411  return;
2412 
2413  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2414 
2415  SU->isVRegCycle = true;
2416 
2417  for (const SDep &Pred : SU->Preds) {
2418  if (Pred.isCtrl()) continue;
2419  Pred.getSUnit()->isVRegCycle = true;
2420  }
2421 }
2422 
2423 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2424 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2425 static void resetVRegCycle(SUnit *SU) {
2426  if (!SU->isVRegCycle)
2427  return;
2428 
2429  for (const SDep &Pred : SU->Preds) {
2430  if (Pred.isCtrl()) continue; // ignore chain preds
2431  SUnit *PredSU = Pred.getSUnit();
2432  if (PredSU->isVRegCycle) {
2433  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2434  "VRegCycle def must be CopyFromReg");
2435  Pred.getSUnit()->isVRegCycle = false;
2436  }
2437  }
2438 }
2439 
2440 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2441 // means a node that defines the VRegCycle has not been scheduled yet.
2442 static bool hasVRegCycleUse(const SUnit *SU) {
2443  // If this SU also defines the VReg, don't hoist it as a "use".
2444  if (SU->isVRegCycle)
2445  return false;
2446 
2447  for (const SDep &Pred : SU->Preds) {
2448  if (Pred.isCtrl()) continue; // ignore chain preds
2449  if (Pred.getSUnit()->isVRegCycle &&
2450  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2451  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2452  return true;
2453  }
2454  }
2455  return false;
2456 }
2457 
2458 // Check for either a dependence (latency) or resource (hazard) stall.
2459 //
2460 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2461 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2462  if ((int)SPQ->getCurCycle() < Height) return true;
2463  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2465  return true;
2466  return false;
2467 }
2468 
2469 // Return -1 if left has higher priority, 1 if right has higher priority.
2470 // Return 0 if latency-based priority is equivalent.
2471 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2472  RegReductionPQBase *SPQ) {
2473  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2474  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2475  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2476  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2477  int LHeight = (int)left->getHeight() + LPenalty;
2478  int RHeight = (int)right->getHeight() + RPenalty;
2479 
2480  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2481  BUHasStall(left, LHeight, SPQ);
2482  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2483  BUHasStall(right, RHeight, SPQ);
2484 
2485  // If scheduling one of the node will cause a pipeline stall, delay it.
2486  // If scheduling either one of the node will cause a pipeline stall, sort
2487  // them according to their height.
2488  if (LStall) {
2489  if (!RStall)
2490  return 1;
2491  if (LHeight != RHeight)
2492  return LHeight > RHeight ? 1 : -1;
2493  } else if (RStall)
2494  return -1;
2495 
2496  // If either node is scheduling for latency, sort them by height/depth
2497  // and latency.
2498  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2499  right->SchedulingPref == Sched::ILP)) {
2500  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2501  // is enabled, grouping instructions by cycle, then its height is already
2502  // covered so only its depth matters. We also reach this point if both stall
2503  // but have the same height.
2504  if (!SPQ->getHazardRec()->isEnabled()) {
2505  if (LHeight != RHeight)
2506  return LHeight > RHeight ? 1 : -1;
2507  }
2508  int LDepth = left->getDepth() - LPenalty;
2509  int RDepth = right->getDepth() - RPenalty;
2510  if (LDepth != RDepth) {
2511  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2512  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2513  << ") depth " << RDepth << "\n");
2514  return LDepth < RDepth ? 1 : -1;
2515  }
2516  if (left->Latency != right->Latency)
2517  return left->Latency > right->Latency ? 1 : -1;
2518  }
2519  return 0;
2520 }
2521 
2522 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2523  // Schedule physical register definitions close to their use. This is
2524  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2525  // long as shortening physreg live ranges is generally good, we can defer
2526  // creating a subtarget hook.
2527  if (!DisableSchedPhysRegJoin) {
2528  bool LHasPhysReg = left->hasPhysRegDefs;
2529  bool RHasPhysReg = right->hasPhysRegDefs;
2530  if (LHasPhysReg != RHasPhysReg) {
2531  #ifndef NDEBUG
2532  static const char *const PhysRegMsg[] = { " has no physreg",
2533  " defines a physreg" };
2534  #endif
2535  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2536  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2537  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2538  return LHasPhysReg < RHasPhysReg;
2539  }
2540  }
2541 
2542  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2543  unsigned LPriority = SPQ->getNodePriority(left);
2544  unsigned RPriority = SPQ->getNodePriority(right);
2545 
2546  // Be really careful about hoisting call operands above previous calls.
2547  // Only allows it if it would reduce register pressure.
2548  if (left->isCall && right->isCallOp) {
2549  unsigned RNumVals = right->getNode()->getNumValues();
2550  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2551  }
2552  if (right->isCall && left->isCallOp) {
2553  unsigned LNumVals = left->getNode()->getNumValues();
2554  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2555  }
2556 
2557  if (LPriority != RPriority)
2558  return LPriority > RPriority;
2559 
2560  // One or both of the nodes are calls and their sethi-ullman numbers are the
2561  // same, then keep source order.
2562  if (left->isCall || right->isCall) {
2563  unsigned LOrder = SPQ->getNodeOrdering(left);
2564  unsigned ROrder = SPQ->getNodeOrdering(right);
2565 
2566  // Prefer an ordering where the lower the non-zero order number, the higher
2567  // the preference.
2568  if ((LOrder || ROrder) && LOrder != ROrder)
2569  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2570  }
2571 
2572  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2573  // e.g.
2574  // t1 = op t2, c1
2575  // t3 = op t4, c2
2576  //
2577  // and the following instructions are both ready.
2578  // t2 = op c3
2579  // t4 = op c4
2580  //
2581  // Then schedule t2 = op first.
2582  // i.e.
2583  // t4 = op c4
2584  // t2 = op c3
2585  // t1 = op t2, c1
2586  // t3 = op t4, c2
2587  //
2588  // This creates more short live intervals.
2589  unsigned LDist = closestSucc(left);
2590  unsigned RDist = closestSucc(right);
2591  if (LDist != RDist)
2592  return LDist < RDist;
2593 
2594  // How many registers becomes live when the node is scheduled.
2595  unsigned LScratch = calcMaxScratches(left);
2596  unsigned RScratch = calcMaxScratches(right);
2597  if (LScratch != RScratch)
2598  return LScratch > RScratch;
2599 
2600  // Comparing latency against a call makes little sense unless the node
2601  // is register pressure-neutral.
2602  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2603  return (left->NodeQueueId > right->NodeQueueId);
2604 
2605  // Do not compare latencies when one or both of the nodes are calls.
2606  if (!DisableSchedCycles &&
2607  !(left->isCall || right->isCall)) {
2608  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2609  if (result != 0)
2610  return result > 0;
2611  }
2612  else {
2613  if (left->getHeight() != right->getHeight())
2614  return left->getHeight() > right->getHeight();
2615 
2616  if (left->getDepth() != right->getDepth())
2617  return left->getDepth() < right->getDepth();
2618  }
2619 
2620  assert(left->NodeQueueId && right->NodeQueueId &&
2621  "NodeQueueId cannot be zero");
2622  return (left->NodeQueueId > right->NodeQueueId);
2623 }
2624 
2625 // Bottom up
2626 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2627  if (int res = checkSpecialNodes(left, right))
2628  return res > 0;
2629 
2630  return BURRSort(left, right, SPQ);
2631 }
2632 
2633 // Source order, otherwise bottom up.
2634 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2635  if (int res = checkSpecialNodes(left, right))
2636  return res > 0;
2637 
2638  unsigned LOrder = SPQ->getNodeOrdering(left);
2639  unsigned ROrder = SPQ->getNodeOrdering(right);
2640 
2641  // Prefer an ordering where the lower the non-zero order number, the higher
2642  // the preference.
2643  if ((LOrder || ROrder) && LOrder != ROrder)
2644  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2645 
2646  return BURRSort(left, right, SPQ);
2647 }
2648 
2649 // If the time between now and when the instruction will be ready can cover
2650 // the spill code, then avoid adding it to the ready queue. This gives long
2651 // stalls highest priority and allows hoisting across calls. It should also
2652 // speed up processing the available queue.
2653 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2654  static const unsigned ReadyDelay = 3;
2655 
2656  if (SPQ->MayReduceRegPressure(SU)) return true;
2657 
2658  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2659 
2660  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2662  return false;
2663 
2664  return true;
2665 }
2666 
2667 // Return true if right should be scheduled with higher priority than left.
2668 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2669  if (int res = checkSpecialNodes(left, right))
2670  return res > 0;
2671 
2672  if (left->isCall || right->isCall)
2673  // No way to compute latency of calls.
2674  return BURRSort(left, right, SPQ);
2675 
2676  bool LHigh = SPQ->HighRegPressure(left);
2677  bool RHigh = SPQ->HighRegPressure(right);
2678  // Avoid causing spills. If register pressure is high, schedule for
2679  // register pressure reduction.
2680  if (LHigh && !RHigh) {
2681  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2682  << right->NodeNum << ")\n");
2683  return true;
2684  }
2685  else if (!LHigh && RHigh) {
2686  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2687  << left->NodeNum << ")\n");
2688  return false;
2689  }
2690  if (!LHigh && !RHigh) {
2691  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2692  if (result != 0)
2693  return result > 0;
2694  }
2695  return BURRSort(left, right, SPQ);
2696 }
2697 
2698 // Schedule as many instructions in each cycle as possible. So don't make an
2699 // instruction available unless it is ready in the current cycle.
2700 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2701  if (SU->getHeight() > CurCycle) return false;
2702 
2703  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2705  return false;
2706 
2707  return true;
2708 }
2709 
2710 static bool canEnableCoalescing(SUnit *SU) {
2711  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2712  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2713  // CopyToReg should be close to its uses to facilitate coalescing and
2714  // avoid spilling.
2715  return true;
2716 
2717  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2718  Opc == TargetOpcode::SUBREG_TO_REG ||
2719  Opc == TargetOpcode::INSERT_SUBREG)
2720  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2721  // close to their uses to facilitate coalescing.
2722  return true;
2723 
2724  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2725  // If SU does not have a register def, schedule it close to its uses
2726  // because it does not lengthen any live ranges.
2727  return true;
2728 
2729  return false;
2730 }
2731 
2732 // list-ilp is currently an experimental scheduler that allows various
2733 // heuristics to be enabled prior to the normal register reduction logic.
2734 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2735  if (int res = checkSpecialNodes(left, right))
2736  return res > 0;
2737 
2738  if (left->isCall || right->isCall)
2739  // No way to compute latency of calls.
2740  return BURRSort(left, right, SPQ);
2741 
2742  unsigned LLiveUses = 0, RLiveUses = 0;
2743  int LPDiff = 0, RPDiff = 0;
2745  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2746  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2747  }
2748  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2749  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2750  << "): " << LPDiff << " != SU(" << right->NodeNum
2751  << "): " << RPDiff << "\n");
2752  return LPDiff > RPDiff;
2753  }
2754 
2755  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2756  bool LReduce = canEnableCoalescing(left);
2757  bool RReduce = canEnableCoalescing(right);
2758  if (LReduce && !RReduce) return false;
2759  if (RReduce && !LReduce) return true;
2760  }
2761 
2762  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2763  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2764  << " != SU(" << right->NodeNum << "): " << RLiveUses
2765  << "\n");
2766  return LLiveUses < RLiveUses;
2767  }
2768 
2769  if (!DisableSchedStalls) {
2770  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2771  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2772  if (LStall != RStall)
2773  return left->getHeight() > right->getHeight();
2774  }
2775 
2776  if (!DisableSchedCriticalPath) {
2777  int spread = (int)left->getDepth() - (int)right->getDepth();
2778  if (std::abs(spread) > MaxReorderWindow) {
2779  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2780  << left->getDepth() << " != SU(" << right->NodeNum
2781  << "): " << right->getDepth() << "\n");
2782  return left->getDepth() < right->getDepth();
2783  }
2784  }
2785 
2786  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2787  int spread = (int)left->getHeight() - (int)right->getHeight();
2788  if (std::abs(spread) > MaxReorderWindow)
2789  return left->getHeight() > right->getHeight();
2790  }
2791 
2792  return BURRSort(left, right, SPQ);
2793 }
2794 
2795 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2796  SUnits = &sunits;
2797  // Add pseudo dependency edges for two-address nodes.
2798  if (!Disable2AddrHack)
2799  AddPseudoTwoAddrDeps();
2800  // Reroute edges to nodes with multiple uses.
2801  if (!TracksRegPressure && !SrcOrder)
2802  PrescheduleNodesWithMultipleUses();
2803  // Calculate node priorities.
2804  CalculateSethiUllmanNumbers();
2805 
2806  // For single block loops, mark nodes that look like canonical IV increments.
2807  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2808  for (SUnit &SU : sunits)
2809  initVRegCycle(&SU);
2810 }
2811 
2812 //===----------------------------------------------------------------------===//
2813 // Preschedule for Register Pressure
2814 //===----------------------------------------------------------------------===//
2815 
2816 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2817  if (SU->isTwoAddress) {
2818  unsigned Opc = SU->getNode()->getMachineOpcode();
2819  const MCInstrDesc &MCID = TII->get(Opc);
2820  unsigned NumRes = MCID.getNumDefs();
2821  unsigned NumOps = MCID.getNumOperands() - NumRes;
2822  for (unsigned i = 0; i != NumOps; ++i) {
2823  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2824  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2825  if (DU->getNodeId() != -1 &&
2826  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2827  return true;
2828  }
2829  }
2830  }
2831  return false;
2832 }
2833 
2834 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2835 /// successor's explicit physregs whose definition can reach DepSU.
2836 /// i.e. DepSU should not be scheduled above SU.
2837 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2838  ScheduleDAGRRList *scheduleDAG,
2839  const TargetInstrInfo *TII,
2840  const TargetRegisterInfo *TRI) {
2841  const MCPhysReg *ImpDefs
2842  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2843  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2844  if(!ImpDefs && !RegMask)
2845  return false;
2846 
2847  for (const SDep &Succ : SU->Succs) {
2848  SUnit *SuccSU = Succ.getSUnit();
2849  for (const SDep &SuccPred : SuccSU->Preds) {
2850  if (!SuccPred.isAssignedRegDep())
2851  continue;
2852 
2853  if (RegMask &&
2854  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2855  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2856  return true;
2857 
2858  if (ImpDefs)
2859  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2860  // Return true if SU clobbers this physical register use and the
2861  // definition of the register reaches from DepSU. IsReachable queries
2862  // a topological forward sort of the DAG (following the successors).
2863  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2864  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2865  return true;
2866  }
2867  }
2868  return false;
2869 }
2870 
2871 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2872 /// physical register defs.
2873 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2874  const TargetInstrInfo *TII,
2875  const TargetRegisterInfo *TRI) {
2876  SDNode *N = SuccSU->getNode();
2877  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2878  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2879  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2880  for (const SDNode *SUNode = SU->getNode(); SUNode;
2881  SUNode = SUNode->getGluedNode()) {
2882  if (!SUNode->isMachineOpcode())
2883  continue;
2884  const MCPhysReg *SUImpDefs =
2885  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2886  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2887  if (!SUImpDefs && !SURegMask)
2888  continue;
2889  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2890  MVT VT = N->getSimpleValueType(i);
2891  if (VT == MVT::Glue || VT == MVT::Other)
2892  continue;
2893  if (!N->hasAnyUseOfValue(i))
2894  continue;
2895  unsigned Reg = ImpDefs[i - NumDefs];
2896  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2897  return true;
2898  if (!SUImpDefs)
2899  continue;
2900  for (;*SUImpDefs; ++SUImpDefs) {
2901  unsigned SUReg = *SUImpDefs;
2902  if (TRI->regsOverlap(Reg, SUReg))
2903  return true;
2904  }
2905  }
2906  }
2907  return false;
2908 }
2909 
2910 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2911 /// are not handled well by the general register pressure reduction
2912 /// heuristics. When presented with code like this:
2913 ///
2914 /// N
2915 /// / |
2916 /// / |
2917 /// U store
2918 /// |
2919 /// ...
2920 ///
2921 /// the heuristics tend to push the store up, but since the
2922 /// operand of the store has another use (U), this would increase
2923 /// the length of that other use (the U->N edge).
2924 ///
2925 /// This function transforms code like the above to route U's
2926 /// dependence through the store when possible, like this:
2927 ///
2928 /// N
2929 /// ||
2930 /// ||
2931 /// store
2932 /// |
2933 /// U
2934 /// |
2935 /// ...
2936 ///
2937 /// This results in the store being scheduled immediately
2938 /// after N, which shortens the U->N live range, reducing
2939 /// register pressure.
2940 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2941  // Visit all the nodes in topological order, working top-down.
2942  for (SUnit &SU : *SUnits) {
2943  // For now, only look at nodes with no data successors, such as stores.
2944  // These are especially important, due to the heuristics in
2945  // getNodePriority for nodes with no data successors.
2946  if (SU.NumSuccs != 0)
2947  continue;
2948  // For now, only look at nodes with exactly one data predecessor.
2949  if (SU.NumPreds != 1)
2950  continue;
2951  // Avoid prescheduling copies to virtual registers, which don't behave
2952  // like other nodes from the perspective of scheduling heuristics.
2953  if (SDNode *N = SU.getNode())
2954  if (N->getOpcode() == ISD::CopyToReg &&
2956  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2957  continue;
2958 
2959  SDNode *PredFrameSetup = nullptr;
2960  for (const SDep &Pred : SU.Preds)
2961  if (Pred.isCtrl() && Pred.getSUnit()) {
2962  // Find the predecessor which is not data dependence.
2963  SDNode *PredND = Pred.getSUnit()->getNode();
2964 
2965  // If PredND is FrameSetup, we should not pre-scheduled the node,
2966  // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2967  // ADJCALLSTACKUP may hold CallResource too long and make other
2968  // calls can't be scheduled. If there's no other available node
2969  // to schedule, the schedular will try to rename the register by
2970  // creating copy to avoid the conflict which will fail because
2971  // CallResource is not a real physical register.
2972  if (PredND && PredND->isMachineOpcode() &&
2973  (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2974  PredFrameSetup = PredND;
2975  break;
2976  }
2977  }
2978  // Skip the node has FrameSetup parent.
2979  if (PredFrameSetup != nullptr)
2980  continue;
2981 
2982  // Locate the single data predecessor.
2983  SUnit *PredSU = nullptr;
2984  for (const SDep &Pred : SU.Preds)
2985  if (!Pred.isCtrl()) {
2986  PredSU = Pred.getSUnit();
2987  break;
2988  }
2989  assert(PredSU);
2990 
2991  // Don't rewrite edges that carry physregs, because that requires additional
2992  // support infrastructure.
2993  if (PredSU->hasPhysRegDefs)
2994  continue;
2995  // Short-circuit the case where SU is PredSU's only data successor.
2996  if (PredSU->NumSuccs == 1)
2997  continue;
2998  // Avoid prescheduling to copies from virtual registers, which don't behave
2999  // like other nodes from the perspective of scheduling heuristics.
3000  if (SDNode *N = SU.getNode())
3001  if (N->getOpcode() == ISD::CopyFromReg &&
3003  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
3004  continue;
3005 
3006  // Perform checks on the successors of PredSU.
3007  for (const SDep &PredSucc : PredSU->Succs) {
3008  SUnit *PredSuccSU = PredSucc.getSUnit();
3009  if (PredSuccSU == &SU) continue;
3010  // If PredSU has another successor with no data successors, for
3011  // now don't attempt to choose either over the other.
3012  if (PredSuccSU->NumSuccs == 0)
3013  goto outer_loop_continue;
3014  // Don't break physical register dependencies.
3015  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3016  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3017  goto outer_loop_continue;
3018  // Don't introduce graph cycles.
3019  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3020  goto outer_loop_continue;
3021  }
3022 
3023  // Ok, the transformation is safe and the heuristics suggest it is
3024  // profitable. Update the graph.
3025  LLVM_DEBUG(
3026  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3027  << PredSU->NodeNum
3028  << " to guide scheduling in the presence of multiple uses\n");
3029  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3030  SDep Edge = PredSU->Succs[i];
3031  assert(!Edge.isAssignedRegDep());
3032  SUnit *SuccSU = Edge.getSUnit();
3033  if (SuccSU != &SU) {
3034  Edge.setSUnit(PredSU);
3035  scheduleDAG->RemovePred(SuccSU, Edge);
3036  scheduleDAG->AddPredQueued(&SU, Edge);
3037  Edge.setSUnit(&SU);
3038  scheduleDAG->AddPredQueued(SuccSU, Edge);
3039  --i;
3040  }
3041  }
3042  outer_loop_continue:;
3043  }
3044 }
3045 
3046 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3047 /// it as a def&use operand. Add a pseudo control edge from it to the other
3048 /// node (if it won't create a cycle) so the two-address one will be scheduled
3049 /// first (lower in the schedule). If both nodes are two-address, favor the
3050 /// one that has a CopyToReg use (more likely to be a loop induction update).
3051 /// If both are two-address, but one is commutable while the other is not
3052 /// commutable, favor the one that's not commutable.
3053 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3054  for (SUnit &SU : *SUnits) {
3055  if (!SU.isTwoAddress)
3056  continue;
3057 
3058  SDNode *Node = SU.getNode();
3059  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3060  continue;
3061 
3062  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3063  unsigned Opc = Node->getMachineOpcode();
3064  const MCInstrDesc &MCID = TII->get(Opc);
3065  unsigned NumRes = MCID.getNumDefs();
3066  unsigned NumOps = MCID.getNumOperands() - NumRes;
3067  for (unsigned j = 0; j != NumOps; ++j) {
3068  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3069  continue;
3070  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3071  if (DU->getNodeId() == -1)
3072  continue;
3073  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3074  if (!DUSU)
3075  continue;
3076  for (const SDep &Succ : DUSU->Succs) {
3077  if (Succ.isCtrl())
3078  continue;
3079  SUnit *SuccSU = Succ.getSUnit();
3080  if (SuccSU == &SU)
3081  continue;
3082  // Be conservative. Ignore if nodes aren't at roughly the same
3083  // depth and height.
3084  if (SuccSU->getHeight() < SU.getHeight() &&
3085  (SU.getHeight() - SuccSU->getHeight()) > 1)
3086  continue;
3087  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3088  // constrains whatever is using the copy, instead of the copy
3089  // itself. In the case that the copy is coalesced, this
3090  // preserves the intent of the pseudo two-address heurietics.
3091  while (SuccSU->Succs.size() == 1 &&
3092  SuccSU->getNode()->isMachineOpcode() &&
3093  SuccSU->getNode()->getMachineOpcode() ==
3094  TargetOpcode::COPY_TO_REGCLASS)
3095  SuccSU = SuccSU->Succs.front().getSUnit();
3096  // Don't constrain non-instruction nodes.
3097  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3098  continue;
3099  // Don't constrain nodes with physical register defs if the
3100  // predecessor can clobber them.
3101  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3102  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3103  continue;
3104  }
3105  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3106  // these may be coalesced away. We want them close to their uses.
3107  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3108  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3109  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3110  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3111  continue;
3112  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3113  (!canClobber(SuccSU, DUSU) ||
3114  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3115  (!SU.isCommutable && SuccSU->isCommutable)) &&
3116  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3117  LLVM_DEBUG(dbgs()
3118  << " Adding a pseudo-two-addr edge from SU #"
3119  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3120  scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3121  }
3122  }
3123  }
3124  }
3125 }
3126 
3127 //===----------------------------------------------------------------------===//
3128 // Public Constructor Functions
3129 //===----------------------------------------------------------------------===//
3130 
3133  CodeGenOpt::Level OptLevel) {
3134  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3135  const TargetInstrInfo *TII = STI.getInstrInfo();
3136  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3137 
3138  BURegReductionPriorityQueue *PQ =
3139  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3140  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3141  PQ->setScheduleDAG(SD);
3142  return SD;
3143 }
3144 
3147  CodeGenOpt::Level OptLevel) {
3148  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3149  const TargetInstrInfo *TII = STI.getInstrInfo();
3150  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3151 
3152  SrcRegReductionPriorityQueue *PQ =
3153  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3154  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3155  PQ->setScheduleDAG(SD);
3156  return SD;
3157 }
3158 
3161  CodeGenOpt::Level OptLevel) {
3162  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3163  const TargetInstrInfo *TII = STI.getInstrInfo();
3164  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3165  const TargetLowering *TLI = IS->TLI;
3166 
3167  HybridBURRPriorityQueue *PQ =
3168  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3169 
3170  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3171  PQ->setScheduleDAG(SD);
3172  return SD;
3173 }
3174 
3177  CodeGenOpt::Level OptLevel) {
3178  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3179  const TargetInstrInfo *TII = STI.getInstrInfo();
3180  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3181  const TargetLowering *TLI = IS->TLI;
3182 
3183  ILPBURRPriorityQueue *PQ =
3184  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3185  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3186  PQ->setScheduleDAG(SD);
3187  return SD;
3188 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:461
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
unsigned Reg
virtual void dumpNode(const SUnit &SU) const =0
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
SDNode * getNode() const
get the SDNode which holds the desired result
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:537
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:44
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:496
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:267
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:544
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:546
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:703
unsigned getNumRegClasses() const
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:250
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:885
Scheduling dependency.
Definition: ScheduleDAG.h:49
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node&#39;s height value, set it to be the new height value...
bool isAvailable()
Definition: Compression.cpp:47
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:570
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:99
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:258
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:189
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
constexpr double e
Definition: MathExtras.h:57
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:711
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:203
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
SI Lower i1 Copies
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:241
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:759
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:542
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1228
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:40
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:272
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:706
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:689
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1224
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.