LLVM  9.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Config/llvm-config.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPred - adds a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Updates the topological ordering if required.
225  void AddPred(SUnit *SU, const SDep &D) {
226  Topo.AddPred(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// RemovePred - removes a predecessor edge from SUnit SU.
231  /// This returns true if an edge was removed.
232  /// Updates the topological ordering if required.
233  void RemovePred(SUnit *SU, const SDep &D) {
234  Topo.RemovePred(SU, D.getSUnit());
235  SU->removePred(D);
236  }
237 
238 private:
239  bool isReady(SUnit *SU) {
240  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
241  AvailableQueue->isReady(SU);
242  }
243 
244  void ReleasePred(SUnit *SU, const SDep *PredEdge);
245  void ReleasePredecessors(SUnit *SU);
246  void ReleasePending();
247  void AdvanceToCycle(unsigned NextCycle);
248  void AdvancePastStalls(SUnit *SU);
249  void EmitNode(SUnit *SU);
250  void ScheduleNodeBottomUp(SUnit*);
251  void CapturePred(SDep *PredEdge);
252  void UnscheduleNodeBottomUp(SUnit*);
253  void RestoreHazardCheckerBottomUp();
254  void BacktrackBottomUp(SUnit*, SUnit*);
255  SUnit *TryUnfoldSU(SUnit *);
256  SUnit *CopyAndMoveSuccessors(SUnit*);
257  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
258  const TargetRegisterClass*,
259  const TargetRegisterClass*,
261  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
262 
263  void releaseInterferences(unsigned Reg = 0);
264 
265  SUnit *PickNodeToScheduleBottomUp();
266  void ListScheduleBottomUp();
267 
268  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
269  /// Updates the topological ordering if required.
270  SUnit *CreateNewSUnit(SDNode *N) {
271  unsigned NumSUnits = SUnits.size();
272  SUnit *NewNode = newSUnit(N);
273  // Update the topological ordering.
274  if (NewNode->NodeNum >= NumSUnits)
276  return NewNode;
277  }
278 
279  /// CreateClone - Creates a new SUnit from an existing one.
280  /// Updates the topological ordering if required.
281  SUnit *CreateClone(SUnit *N) {
282  unsigned NumSUnits = SUnits.size();
283  SUnit *NewNode = Clone(N);
284  // Update the topological ordering.
285  if (NewNode->NodeNum >= NumSUnits)
287  return NewNode;
288  }
289 
290  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
291  /// need actual latency information but the hybrid scheduler does.
292  bool forceUnitLatencies() const override {
293  return !NeedLatency;
294  }
295 };
296 
297 } // end anonymous namespace
298 
299 /// GetCostForDef - Looks up the register class and cost for a given definition.
300 /// Typically this just means looking up the representative register class,
301 /// but for untyped values (MVT::Untyped) it means inspecting the node's
302 /// opcode to determine what register class is being generated.
303 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
304  const TargetLowering *TLI,
305  const TargetInstrInfo *TII,
306  const TargetRegisterInfo *TRI,
307  unsigned &RegClass, unsigned &Cost,
308  const MachineFunction &MF) {
309  MVT VT = RegDefPos.GetValue();
310 
311  // Special handling for untyped values. These values can only come from
312  // the expansion of custom DAG-to-DAG patterns.
313  if (VT == MVT::Untyped) {
314  const SDNode *Node = RegDefPos.GetNode();
315 
316  // Special handling for CopyFromReg of untyped values.
317  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
318  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
319  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
320  RegClass = RC->getID();
321  Cost = 1;
322  return;
323  }
324 
325  unsigned Opcode = Node->getMachineOpcode();
326  if (Opcode == TargetOpcode::REG_SEQUENCE) {
327  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
328  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
329  RegClass = RC->getID();
330  Cost = 1;
331  return;
332  }
333 
334  unsigned Idx = RegDefPos.GetIdx();
335  const MCInstrDesc Desc = TII->get(Opcode);
336  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
337  RegClass = RC->getID();
338  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
339  // better way to determine it.
340  Cost = 1;
341  } else {
342  RegClass = TLI->getRepRegClassFor(VT)->getID();
343  Cost = TLI->getRepRegClassCostFor(VT);
344  }
345 }
346 
347 /// Schedule - Schedule the DAG using list scheduling.
348 void ScheduleDAGRRList::Schedule() {
349  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
350  << " '" << BB->getName() << "' **********\n");
351 
352  CurCycle = 0;
353  IssueCount = 0;
354  MinAvailableCycle =
356  NumLiveRegs = 0;
357  // Allocate slots for each physical register, plus one for a special register
358  // to track the virtual resource of a calling sequence.
359  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
360  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
361  CallSeqEndForStart.clear();
362  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
363 
364  // Build the scheduling graph.
365  BuildSchedGraph(nullptr);
366 
367  LLVM_DEBUG(dump());
369 
370  AvailableQueue->initNodes(SUnits);
371 
372  HazardRec->Reset();
373 
374  // Execute the actual scheduling loop.
375  ListScheduleBottomUp();
376 
377  AvailableQueue->releaseState();
378 
379  LLVM_DEBUG({
380  dbgs() << "*** Final schedule ***\n";
381  dumpSchedule();
382  dbgs() << '\n';
383  });
384 }
385 
386 //===----------------------------------------------------------------------===//
387 // Bottom-Up Scheduling
388 //===----------------------------------------------------------------------===//
389 
390 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
391 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
392 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
393  SUnit *PredSU = PredEdge->getSUnit();
394 
395 #ifndef NDEBUG
396  if (PredSU->NumSuccsLeft == 0) {
397  dbgs() << "*** Scheduling failed! ***\n";
398  dumpNode(*PredSU);
399  dbgs() << " has been released too many times!\n";
400  llvm_unreachable(nullptr);
401  }
402 #endif
403  --PredSU->NumSuccsLeft;
404 
405  if (!forceUnitLatencies()) {
406  // Updating predecessor's height. This is now the cycle when the
407  // predecessor can be scheduled without causing a pipeline stall.
408  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
409  }
410 
411  // If all the node's successors are scheduled, this node is ready
412  // to be scheduled. Ignore the special EntrySU node.
413  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
414  PredSU->isAvailable = true;
415 
416  unsigned Height = PredSU->getHeight();
417  if (Height < MinAvailableCycle)
418  MinAvailableCycle = Height;
419 
420  if (isReady(PredSU)) {
421  AvailableQueue->push(PredSU);
422  }
423  // CapturePred and others may have left the node in the pending queue, avoid
424  // adding it twice.
425  else if (!PredSU->isPending) {
426  PredSU->isPending = true;
427  PendingQueue.push_back(PredSU);
428  }
429  }
430 }
431 
432 /// IsChainDependent - Test if Outer is reachable from Inner through
433 /// chain dependencies.
434 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
435  unsigned NestLevel,
436  const TargetInstrInfo *TII) {
437  SDNode *N = Outer;
438  while (true) {
439  if (N == Inner)
440  return true;
441  // For a TokenFactor, examine each operand. There may be multiple ways
442  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
443  // most nesting in order to ensure that we find the corresponding match.
444  if (N->getOpcode() == ISD::TokenFactor) {
445  for (const SDValue &Op : N->op_values())
446  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
447  return true;
448  return false;
449  }
450  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
451  if (N->isMachineOpcode()) {
452  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
453  ++NestLevel;
454  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
455  if (NestLevel == 0)
456  return false;
457  --NestLevel;
458  }
459  }
460  // Otherwise, find the chain and continue climbing.
461  for (const SDValue &Op : N->op_values())
462  if (Op.getValueType() == MVT::Other) {
463  N = Op.getNode();
464  goto found_chain_operand;
465  }
466  return false;
467  found_chain_operand:;
468  if (N->getOpcode() == ISD::EntryToken)
469  return false;
470  }
471 }
472 
473 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
474 /// the corresponding (lowered) CALLSEQ_BEGIN node.
475 ///
476 /// NestLevel and MaxNested are used in recursion to indcate the current level
477 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
478 /// level seen so far.
479 ///
480 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
481 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
482 static SDNode *
483 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
484  const TargetInstrInfo *TII) {
485  while (true) {
486  // For a TokenFactor, examine each operand. There may be multiple ways
487  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
488  // most nesting in order to ensure that we find the corresponding match.
489  if (N->getOpcode() == ISD::TokenFactor) {
490  SDNode *Best = nullptr;
491  unsigned BestMaxNest = MaxNest;
492  for (const SDValue &Op : N->op_values()) {
493  unsigned MyNestLevel = NestLevel;
494  unsigned MyMaxNest = MaxNest;
495  if (SDNode *New = FindCallSeqStart(Op.getNode(),
496  MyNestLevel, MyMaxNest, TII))
497  if (!Best || (MyMaxNest > BestMaxNest)) {
498  Best = New;
499  BestMaxNest = MyMaxNest;
500  }
501  }
502  assert(Best);
503  MaxNest = BestMaxNest;
504  return Best;
505  }
506  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
507  if (N->isMachineOpcode()) {
508  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
509  ++NestLevel;
510  MaxNest = std::max(MaxNest, NestLevel);
511  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
512  assert(NestLevel != 0);
513  --NestLevel;
514  if (NestLevel == 0)
515  return N;
516  }
517  }
518  // Otherwise, find the chain and continue climbing.
519  for (const SDValue &Op : N->op_values())
520  if (Op.getValueType() == MVT::Other) {
521  N = Op.getNode();
522  goto found_chain_operand;
523  }
524  return nullptr;
525  found_chain_operand:;
526  if (N->getOpcode() == ISD::EntryToken)
527  return nullptr;
528  }
529 }
530 
531 /// Call ReleasePred for each predecessor, then update register live def/gen.
532 /// Always update LiveRegDefs for a register dependence even if the current SU
533 /// also defines the register. This effectively create one large live range
534 /// across a sequence of two-address node. This is important because the
535 /// entire chain must be scheduled together. Example:
536 ///
537 /// flags = (3) add
538 /// flags = (2) addc flags
539 /// flags = (1) addc flags
540 ///
541 /// results in
542 ///
543 /// LiveRegDefs[flags] = 3
544 /// LiveRegGens[flags] = 1
545 ///
546 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
547 /// interference on flags.
548 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
549  // Bottom up: release predecessors
550  for (SDep &Pred : SU->Preds) {
551  ReleasePred(SU, &Pred);
552  if (Pred.isAssignedRegDep()) {
553  // This is a physical register dependency and it's impossible or
554  // expensive to copy the register. Make sure nothing that can
555  // clobber the register is scheduled between the predecessor and
556  // this node.
557  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
558  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
559  "interference on register dependence");
560  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
561  if (!LiveRegGens[Pred.getReg()]) {
562  ++NumLiveRegs;
563  LiveRegGens[Pred.getReg()] = SU;
564  }
565  }
566  }
567 
568  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
569  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
570  // these nodes, to prevent other calls from being interscheduled with them.
571  unsigned CallResource = TRI->getNumRegs();
572  if (!LiveRegDefs[CallResource])
573  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
574  if (Node->isMachineOpcode() &&
575  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
576  unsigned NestLevel = 0;
577  unsigned MaxNest = 0;
578  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
579  assert(N && "Must find call sequence start");
580 
581  SUnit *Def = &SUnits[N->getNodeId()];
582  CallSeqEndForStart[Def] = SU;
583 
584  ++NumLiveRegs;
585  LiveRegDefs[CallResource] = Def;
586  LiveRegGens[CallResource] = SU;
587  break;
588  }
589 }
590 
591 /// Check to see if any of the pending instructions are ready to issue. If
592 /// so, add them to the available queue.
593 void ScheduleDAGRRList::ReleasePending() {
594  if (DisableSchedCycles) {
595  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
596  return;
597  }
598 
599  // If the available queue is empty, it is safe to reset MinAvailableCycle.
600  if (AvailableQueue->empty())
601  MinAvailableCycle = std::numeric_limits<unsigned>::max();
602 
603  // Check to see if any of the pending instructions are ready to issue. If
604  // so, add them to the available queue.
605  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
606  unsigned ReadyCycle = PendingQueue[i]->getHeight();
607  if (ReadyCycle < MinAvailableCycle)
608  MinAvailableCycle = ReadyCycle;
609 
610  if (PendingQueue[i]->isAvailable) {
611  if (!isReady(PendingQueue[i]))
612  continue;
613  AvailableQueue->push(PendingQueue[i]);
614  }
615  PendingQueue[i]->isPending = false;
616  PendingQueue[i] = PendingQueue.back();
617  PendingQueue.pop_back();
618  --i; --e;
619  }
620 }
621 
622 /// Move the scheduler state forward by the specified number of Cycles.
623 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
624  if (NextCycle <= CurCycle)
625  return;
626 
627  IssueCount = 0;
628  AvailableQueue->setCurCycle(NextCycle);
629  if (!HazardRec->isEnabled()) {
630  // Bypass lots of virtual calls in case of long latency.
631  CurCycle = NextCycle;
632  }
633  else {
634  for (; CurCycle != NextCycle; ++CurCycle) {
635  HazardRec->RecedeCycle();
636  }
637  }
638  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
639  // available Q to release pending nodes at least once before popping.
640  ReleasePending();
641 }
642 
643 /// Move the scheduler state forward until the specified node's dependents are
644 /// ready and can be scheduled with no resource conflicts.
645 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
646  if (DisableSchedCycles)
647  return;
648 
649  // FIXME: Nodes such as CopyFromReg probably should not advance the current
650  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
651  // has predecessors the cycle will be advanced when they are scheduled.
652  // But given the crude nature of modeling latency though such nodes, we
653  // currently need to treat these nodes like real instructions.
654  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
655 
656  unsigned ReadyCycle = SU->getHeight();
657 
658  // Bump CurCycle to account for latency. We assume the latency of other
659  // available instructions may be hidden by the stall (not a full pipe stall).
660  // This updates the hazard recognizer's cycle before reserving resources for
661  // this instruction.
662  AdvanceToCycle(ReadyCycle);
663 
664  // Calls are scheduled in their preceding cycle, so don't conflict with
665  // hazards from instructions after the call. EmitNode will reset the
666  // scoreboard state before emitting the call.
667  if (SU->isCall)
668  return;
669 
670  // FIXME: For resource conflicts in very long non-pipelined stages, we
671  // should probably skip ahead here to avoid useless scoreboard checks.
672  int Stalls = 0;
673  while (true) {
675  HazardRec->getHazardType(SU, -Stalls);
676 
678  break;
679 
680  ++Stalls;
681  }
682  AdvanceToCycle(CurCycle + Stalls);
683 }
684 
685 /// Record this SUnit in the HazardRecognizer.
686 /// Does not update CurCycle.
687 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
688  if (!HazardRec->isEnabled())
689  return;
690 
691  // Check for phys reg copy.
692  if (!SU->getNode())
693  return;
694 
695  switch (SU->getNode()->getOpcode()) {
696  default:
697  assert(SU->getNode()->isMachineOpcode() &&
698  "This target-independent node should not be scheduled.");
699  break;
700  case ISD::MERGE_VALUES:
701  case ISD::TokenFactor:
702  case ISD::LIFETIME_START:
703  case ISD::LIFETIME_END:
704  case ISD::CopyToReg:
705  case ISD::CopyFromReg:
706  case ISD::EH_LABEL:
707  // Noops don't affect the scoreboard state. Copies are likely to be
708  // removed.
709  return;
710  case ISD::INLINEASM:
711  case ISD::INLINEASM_BR:
712  // For inline asm, clear the pipeline state.
713  HazardRec->Reset();
714  return;
715  }
716  if (SU->isCall) {
717  // Calls are scheduled with their preceding instructions. For bottom-up
718  // scheduling, clear the pipeline state before emitting.
719  HazardRec->Reset();
720  }
721 
722  HazardRec->EmitInstruction(SU);
723 }
724 
725 static void resetVRegCycle(SUnit *SU);
726 
727 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
728 /// count of its predecessors. If a predecessor pending count is zero, add it to
729 /// the Available queue.
730 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
731  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
732  LLVM_DEBUG(dumpNode(*SU));
733 
734 #ifndef NDEBUG
735  if (CurCycle < SU->getHeight())
736  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
737  << "] pipeline stall!\n");
738 #endif
739 
740  // FIXME: Do not modify node height. It may interfere with
741  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
742  // node its ready cycle can aid heuristics, and after scheduling it can
743  // indicate the scheduled cycle.
744  SU->setHeightToAtLeast(CurCycle);
745 
746  // Reserve resources for the scheduled instruction.
747  EmitNode(SU);
748 
749  Sequence.push_back(SU);
750 
751  AvailableQueue->scheduledNode(SU);
752 
753  // If HazardRec is disabled, and each inst counts as one cycle, then
754  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
755  // PendingQueue for schedulers that implement HasReadyFilter.
756  if (!HazardRec->isEnabled() && AvgIPC < 2)
757  AdvanceToCycle(CurCycle + 1);
758 
759  // Update liveness of predecessors before successors to avoid treating a
760  // two-address node as a live range def.
761  ReleasePredecessors(SU);
762 
763  // Release all the implicit physical register defs that are live.
764  for (SDep &Succ : SU->Succs) {
765  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
766  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
767  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
768  --NumLiveRegs;
769  LiveRegDefs[Succ.getReg()] = nullptr;
770  LiveRegGens[Succ.getReg()] = nullptr;
771  releaseInterferences(Succ.getReg());
772  }
773  }
774  // Release the special call resource dependence, if this is the beginning
775  // of a call.
776  unsigned CallResource = TRI->getNumRegs();
777  if (LiveRegDefs[CallResource] == SU)
778  for (const SDNode *SUNode = SU->getNode(); SUNode;
779  SUNode = SUNode->getGluedNode()) {
780  if (SUNode->isMachineOpcode() &&
781  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
782  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
783  --NumLiveRegs;
784  LiveRegDefs[CallResource] = nullptr;
785  LiveRegGens[CallResource] = nullptr;
786  releaseInterferences(CallResource);
787  }
788  }
789 
790  resetVRegCycle(SU);
791 
792  SU->isScheduled = true;
793 
794  // Conditions under which the scheduler should eagerly advance the cycle:
795  // (1) No available instructions
796  // (2) All pipelines full, so available instructions must have hazards.
797  //
798  // If HazardRec is disabled, the cycle was pre-advanced before calling
799  // ReleasePredecessors. In that case, IssueCount should remain 0.
800  //
801  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
802  if (HazardRec->isEnabled() || AvgIPC > 1) {
803  if (SU->getNode() && SU->getNode()->isMachineOpcode())
804  ++IssueCount;
805  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
806  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
807  AdvanceToCycle(CurCycle + 1);
808  }
809 }
810 
811 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
812 /// unscheduled, increase the succ left count of its predecessors. Remove
813 /// them from AvailableQueue if necessary.
814 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
815  SUnit *PredSU = PredEdge->getSUnit();
816  if (PredSU->isAvailable) {
817  PredSU->isAvailable = false;
818  if (!PredSU->isPending)
819  AvailableQueue->remove(PredSU);
820  }
821 
823  "NumSuccsLeft will overflow!");
824  ++PredSU->NumSuccsLeft;
825 }
826 
827 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
828 /// its predecessor states to reflect the change.
829 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
830  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
831  LLVM_DEBUG(dumpNode(*SU));
832 
833  for (SDep &Pred : SU->Preds) {
834  CapturePred(&Pred);
835  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
836  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
837  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
838  "Physical register dependency violated?");
839  --NumLiveRegs;
840  LiveRegDefs[Pred.getReg()] = nullptr;
841  LiveRegGens[Pred.getReg()] = nullptr;
842  releaseInterferences(Pred.getReg());
843  }
844  }
845 
846  // Reclaim the special call resource dependence, if this is the beginning
847  // of a call.
848  unsigned CallResource = TRI->getNumRegs();
849  for (const SDNode *SUNode = SU->getNode(); SUNode;
850  SUNode = SUNode->getGluedNode()) {
851  if (SUNode->isMachineOpcode() &&
852  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
853  SUnit *SeqEnd = CallSeqEndForStart[SU];
854  assert(SeqEnd && "Call sequence start/end must be known");
855  assert(!LiveRegDefs[CallResource]);
856  assert(!LiveRegGens[CallResource]);
857  ++NumLiveRegs;
858  LiveRegDefs[CallResource] = SU;
859  LiveRegGens[CallResource] = SeqEnd;
860  }
861  }
862 
863  // Release the special call resource dependence, if this is the end
864  // of a call.
865  if (LiveRegGens[CallResource] == SU)
866  for (const SDNode *SUNode = SU->getNode(); SUNode;
867  SUNode = SUNode->getGluedNode()) {
868  if (SUNode->isMachineOpcode() &&
869  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
870  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
871  assert(LiveRegDefs[CallResource]);
872  assert(LiveRegGens[CallResource]);
873  --NumLiveRegs;
874  LiveRegDefs[CallResource] = nullptr;
875  LiveRegGens[CallResource] = nullptr;
876  releaseInterferences(CallResource);
877  }
878  }
879 
880  for (auto &Succ : SU->Succs) {
881  if (Succ.isAssignedRegDep()) {
882  auto Reg = Succ.getReg();
883  if (!LiveRegDefs[Reg])
884  ++NumLiveRegs;
885  // This becomes the nearest def. Note that an earlier def may still be
886  // pending if this is a two-address node.
887  LiveRegDefs[Reg] = SU;
888 
889  // Update LiveRegGen only if was empty before this unscheduling.
890  // This is to avoid incorrect updating LiveRegGen set in previous run.
891  if (!LiveRegGens[Reg]) {
892  // Find the successor with the lowest height.
893  LiveRegGens[Reg] = Succ.getSUnit();
894  for (auto &Succ2 : SU->Succs) {
895  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
896  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
897  LiveRegGens[Reg] = Succ2.getSUnit();
898  }
899  }
900  }
901  }
902  if (SU->getHeight() < MinAvailableCycle)
903  MinAvailableCycle = SU->getHeight();
904 
905  SU->setHeightDirty();
906  SU->isScheduled = false;
907  SU->isAvailable = true;
908  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
909  // Don't make available until backtracking is complete.
910  SU->isPending = true;
911  PendingQueue.push_back(SU);
912  }
913  else {
914  AvailableQueue->push(SU);
915  }
916  AvailableQueue->unscheduledNode(SU);
917 }
918 
919 /// After backtracking, the hazard checker needs to be restored to a state
920 /// corresponding the current cycle.
921 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
922  HazardRec->Reset();
923 
924  unsigned LookAhead = std::min((unsigned)Sequence.size(),
925  HazardRec->getMaxLookAhead());
926  if (LookAhead == 0)
927  return;
928 
929  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
930  unsigned HazardCycle = (*I)->getHeight();
931  for (auto E = Sequence.end(); I != E; ++I) {
932  SUnit *SU = *I;
933  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
934  HazardRec->RecedeCycle();
935  }
936  EmitNode(SU);
937  }
938 }
939 
940 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
941 /// BTCycle in order to schedule a specific node.
942 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
943  SUnit *OldSU = Sequence.back();
944  while (true) {
945  Sequence.pop_back();
946  // FIXME: use ready cycle instead of height
947  CurCycle = OldSU->getHeight();
948  UnscheduleNodeBottomUp(OldSU);
949  AvailableQueue->setCurCycle(CurCycle);
950  if (OldSU == BtSU)
951  break;
952  OldSU = Sequence.back();
953  }
954 
955  assert(!SU->isSucc(OldSU) && "Something is wrong!");
956 
957  RestoreHazardCheckerBottomUp();
958 
959  ReleasePending();
960 
961  ++NumBacktracks;
962 }
963 
964 static bool isOperandOf(const SUnit *SU, SDNode *N) {
965  for (const SDNode *SUNode = SU->getNode(); SUNode;
966  SUNode = SUNode->getGluedNode()) {
967  if (SUNode->isOperandOf(N))
968  return true;
969  }
970  return false;
971 }
972 
973 /// TryUnfold - Attempt to unfold
974 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
975  SDNode *N = SU->getNode();
976  // Use while over if to ease fall through.
977  SmallVector<SDNode *, 2> NewNodes;
978  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
979  return nullptr;
980 
981  // unfolding an x86 DEC64m operation results in store, dec, load which
982  // can't be handled here so quit
983  if (NewNodes.size() == 3)
984  return nullptr;
985 
986  assert(NewNodes.size() == 2 && "Expected a load folding node!");
987 
988  N = NewNodes[1];
989  SDNode *LoadNode = NewNodes[0];
990  unsigned NumVals = N->getNumValues();
991  unsigned OldNumVals = SU->getNode()->getNumValues();
992 
993  // LoadNode may already exist. This can happen when there is another
994  // load from the same location and producing the same type of value
995  // but it has different alignment or volatileness.
996  bool isNewLoad = true;
997  SUnit *LoadSU;
998  if (LoadNode->getNodeId() != -1) {
999  LoadSU = &SUnits[LoadNode->getNodeId()];
1000  // If LoadSU has already been scheduled, we should clone it but
1001  // this would negate the benefit to unfolding so just return SU.
1002  if (LoadSU->isScheduled)
1003  return SU;
1004  isNewLoad = false;
1005  } else {
1006  LoadSU = CreateNewSUnit(LoadNode);
1007  LoadNode->setNodeId(LoadSU->NodeNum);
1008 
1009  InitNumRegDefsLeft(LoadSU);
1010  computeLatency(LoadSU);
1011  }
1012 
1013  bool isNewN = true;
1014  SUnit *NewSU;
1015  // This can only happen when isNewLoad is false.
1016  if (N->getNodeId() != -1) {
1017  NewSU = &SUnits[N->getNodeId()];
1018  // If NewSU has already been scheduled, we need to clone it, but this
1019  // negates the benefit to unfolding so just return SU.
1020  if (NewSU->isScheduled)
1021  return SU;
1022  isNewN = false;
1023  } else {
1024  NewSU = CreateNewSUnit(N);
1025  N->setNodeId(NewSU->NodeNum);
1026 
1027  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1028  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1029  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1030  NewSU->isTwoAddress = true;
1031  break;
1032  }
1033  }
1034  if (MCID.isCommutable())
1035  NewSU->isCommutable = true;
1036 
1037  InitNumRegDefsLeft(NewSU);
1038  computeLatency(NewSU);
1039  }
1040 
1041  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1042 
1043  // Now that we are committed to unfolding replace DAG Uses.
1044  for (unsigned i = 0; i != NumVals; ++i)
1045  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1046  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1047  SDValue(LoadNode, 1));
1048 
1049  // Record all the edges to and from the old SU, by category.
1050  SmallVector<SDep, 4> ChainPreds;
1051  SmallVector<SDep, 4> ChainSuccs;
1052  SmallVector<SDep, 4> LoadPreds;
1053  SmallVector<SDep, 4> NodePreds;
1054  SmallVector<SDep, 4> NodeSuccs;
1055  for (SDep &Pred : SU->Preds) {
1056  if (Pred.isCtrl())
1057  ChainPreds.push_back(Pred);
1058  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1059  LoadPreds.push_back(Pred);
1060  else
1061  NodePreds.push_back(Pred);
1062  }
1063  for (SDep &Succ : SU->Succs) {
1064  if (Succ.isCtrl())
1065  ChainSuccs.push_back(Succ);
1066  else
1067  NodeSuccs.push_back(Succ);
1068  }
1069 
1070  // Now assign edges to the newly-created nodes.
1071  for (const SDep &Pred : ChainPreds) {
1072  RemovePred(SU, Pred);
1073  if (isNewLoad)
1074  AddPred(LoadSU, Pred);
1075  }
1076  for (const SDep &Pred : LoadPreds) {
1077  RemovePred(SU, Pred);
1078  if (isNewLoad)
1079  AddPred(LoadSU, Pred);
1080  }
1081  for (const SDep &Pred : NodePreds) {
1082  RemovePred(SU, Pred);
1083  AddPred(NewSU, Pred);
1084  }
1085  for (SDep D : NodeSuccs) {
1086  SUnit *SuccDep = D.getSUnit();
1087  D.setSUnit(SU);
1088  RemovePred(SuccDep, D);
1089  D.setSUnit(NewSU);
1090  AddPred(SuccDep, D);
1091  // Balance register pressure.
1092  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1093  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1094  --NewSU->NumRegDefsLeft;
1095  }
1096  for (SDep D : ChainSuccs) {
1097  SUnit *SuccDep = D.getSUnit();
1098  D.setSUnit(SU);
1099  RemovePred(SuccDep, D);
1100  if (isNewLoad) {
1101  D.setSUnit(LoadSU);
1102  AddPred(SuccDep, D);
1103  }
1104  }
1105 
1106  // Add a data dependency to reflect that NewSU reads the value defined
1107  // by LoadSU.
1108  SDep D(LoadSU, SDep::Data, 0);
1109  D.setLatency(LoadSU->Latency);
1110  AddPred(NewSU, D);
1111 
1112  if (isNewLoad)
1113  AvailableQueue->addNode(LoadSU);
1114  if (isNewN)
1115  AvailableQueue->addNode(NewSU);
1116 
1117  ++NumUnfolds;
1118 
1119  if (NewSU->NumSuccsLeft == 0)
1120  NewSU->isAvailable = true;
1121 
1122  return NewSU;
1123 }
1124 
1125 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1126 /// successors to the newly created node.
1127 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1128  SDNode *N = SU->getNode();
1129  if (!N)
1130  return nullptr;
1131 
1132  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1133  LLVM_DEBUG(dumpNode(*SU));
1134 
1135  if (N->getGluedNode() &&
1136  !TII->canCopyGluedNodeDuringSchedule(N)) {
1137  LLVM_DEBUG(
1138  dbgs()
1139  << "Giving up because it has incoming glue and the target does not "
1140  "want to copy it\n");
1141  return nullptr;
1142  }
1143 
1144  SUnit *NewSU;
1145  bool TryUnfold = false;
1146  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1147  MVT VT = N->getSimpleValueType(i);
1148  if (VT == MVT::Glue) {
1149  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1150  return nullptr;
1151  } else if (VT == MVT::Other)
1152  TryUnfold = true;
1153  }
1154  for (const SDValue &Op : N->op_values()) {
1155  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1156  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1157  LLVM_DEBUG(
1158  dbgs() << "Giving up because it one of the operands is glue and "
1159  "the target does not want to copy it\n");
1160  return nullptr;
1161  }
1162  }
1163 
1164  // If possible unfold instruction.
1165  if (TryUnfold) {
1166  SUnit *UnfoldSU = TryUnfoldSU(SU);
1167  if (!UnfoldSU)
1168  return nullptr;
1169  SU = UnfoldSU;
1170  N = SU->getNode();
1171  // If this can be scheduled don't bother duplicating and just return
1172  if (SU->NumSuccsLeft == 0)
1173  return SU;
1174  }
1175 
1176  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1177  NewSU = CreateClone(SU);
1178 
1179  // New SUnit has the exact same predecessors.
1180  for (SDep &Pred : SU->Preds)
1181  if (!Pred.isArtificial())
1182  AddPred(NewSU, Pred);
1183 
1184  // Only copy scheduled successors. Cut them from old node's successor
1185  // list and move them over.
1187  for (SDep &Succ : SU->Succs) {
1188  if (Succ.isArtificial())
1189  continue;
1190  SUnit *SuccSU = Succ.getSUnit();
1191  if (SuccSU->isScheduled) {
1192  SDep D = Succ;
1193  D.setSUnit(NewSU);
1194  AddPred(SuccSU, D);
1195  D.setSUnit(SU);
1196  DelDeps.push_back(std::make_pair(SuccSU, D));
1197  }
1198  }
1199  for (auto &DelDep : DelDeps)
1200  RemovePred(DelDep.first, DelDep.second);
1201 
1202  AvailableQueue->updateNode(SU);
1203  AvailableQueue->addNode(NewSU);
1204 
1205  ++NumDups;
1206  return NewSU;
1207 }
1208 
1209 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1210 /// scheduled successors of the given SUnit to the last copy.
1211 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1212  const TargetRegisterClass *DestRC,
1213  const TargetRegisterClass *SrcRC,
1215  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1216  CopyFromSU->CopySrcRC = SrcRC;
1217  CopyFromSU->CopyDstRC = DestRC;
1218 
1219  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1220  CopyToSU->CopySrcRC = DestRC;
1221  CopyToSU->CopyDstRC = SrcRC;
1222 
1223  // Only copy scheduled successors. Cut them from old node's successor
1224  // list and move them over.
1226  for (SDep &Succ : SU->Succs) {
1227  if (Succ.isArtificial())
1228  continue;
1229  SUnit *SuccSU = Succ.getSUnit();
1230  if (SuccSU->isScheduled) {
1231  SDep D = Succ;
1232  D.setSUnit(CopyToSU);
1233  AddPred(SuccSU, D);
1234  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1235  }
1236  else {
1237  // Avoid scheduling the def-side copy before other successors. Otherwise
1238  // we could introduce another physreg interference on the copy and
1239  // continue inserting copies indefinitely.
1240  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1241  }
1242  }
1243  for (auto &DelDep : DelDeps)
1244  RemovePred(DelDep.first, DelDep.second);
1245 
1246  SDep FromDep(SU, SDep::Data, Reg);
1247  FromDep.setLatency(SU->Latency);
1248  AddPred(CopyFromSU, FromDep);
1249  SDep ToDep(CopyFromSU, SDep::Data, 0);
1250  ToDep.setLatency(CopyFromSU->Latency);
1251  AddPred(CopyToSU, ToDep);
1252 
1253  AvailableQueue->updateNode(SU);
1254  AvailableQueue->addNode(CopyFromSU);
1255  AvailableQueue->addNode(CopyToSU);
1256  Copies.push_back(CopyFromSU);
1257  Copies.push_back(CopyToSU);
1258 
1259  ++NumPRCopies;
1260 }
1261 
1262 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1263 /// definition of the specified node.
1264 /// FIXME: Move to SelectionDAG?
1265 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1266  const TargetInstrInfo *TII) {
1267  unsigned NumRes;
1268  if (N->getOpcode() == ISD::CopyFromReg) {
1269  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1270  NumRes = 1;
1271  } else {
1272  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1273  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1274  NumRes = MCID.getNumDefs();
1275  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1276  if (Reg == *ImpDef)
1277  break;
1278  ++NumRes;
1279  }
1280  }
1281  return N->getSimpleValueType(NumRes);
1282 }
1283 
1284 /// CheckForLiveRegDef - Return true and update live register vector if the
1285 /// specified register def of the specified SUnit clobbers any "live" registers.
1286 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1287  SUnit **LiveRegDefs,
1288  SmallSet<unsigned, 4> &RegAdded,
1290  const TargetRegisterInfo *TRI) {
1291  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1292 
1293  // Check if Ref is live.
1294  if (!LiveRegDefs[*AliasI]) continue;
1295 
1296  // Allow multiple uses of the same def.
1297  if (LiveRegDefs[*AliasI] == SU) continue;
1298 
1299  // Add Reg to the set of interfering live regs.
1300  if (RegAdded.insert(*AliasI).second) {
1301  LRegs.push_back(*AliasI);
1302  }
1303  }
1304 }
1305 
1306 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1307 /// by RegMask, and add them to LRegs.
1308 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1309  ArrayRef<SUnit*> LiveRegDefs,
1310  SmallSet<unsigned, 4> &RegAdded,
1311  SmallVectorImpl<unsigned> &LRegs) {
1312  // Look at all live registers. Skip Reg0 and the special CallResource.
1313  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1314  if (!LiveRegDefs[i]) continue;
1315  if (LiveRegDefs[i] == SU) continue;
1316  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1317  if (RegAdded.insert(i).second)
1318  LRegs.push_back(i);
1319  }
1320 }
1321 
1322 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1323 static const uint32_t *getNodeRegMask(const SDNode *N) {
1324  for (const SDValue &Op : N->op_values())
1325  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1326  return RegOp->getRegMask();
1327  return nullptr;
1328 }
1329 
1330 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1331 /// scheduling of the given node to satisfy live physical register dependencies.
1332 /// If the specific node is the last one that's available to schedule, do
1333 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1334 bool ScheduleDAGRRList::
1335 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1336  if (NumLiveRegs == 0)
1337  return false;
1338 
1339  SmallSet<unsigned, 4> RegAdded;
1340  // If this node would clobber any "live" register, then it's not ready.
1341  //
1342  // If SU is the currently live definition of the same register that it uses,
1343  // then we are free to schedule it.
1344  for (SDep &Pred : SU->Preds) {
1345  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1346  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1347  RegAdded, LRegs, TRI);
1348  }
1349 
1350  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1351  if (Node->getOpcode() == ISD::INLINEASM ||
1352  Node->getOpcode() == ISD::INLINEASM_BR) {
1353  // Inline asm can clobber physical defs.
1354  unsigned NumOps = Node->getNumOperands();
1355  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1356  --NumOps; // Ignore the glue operand.
1357 
1358  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1359  unsigned Flags =
1360  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1361  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1362 
1363  ++i; // Skip the ID value.
1364  if (InlineAsm::isRegDefKind(Flags) ||
1366  InlineAsm::isClobberKind(Flags)) {
1367  // Check for def of register or earlyclobber register.
1368  for (; NumVals; --NumVals, ++i) {
1369  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1371  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1372  }
1373  } else
1374  i += NumVals;
1375  }
1376  continue;
1377  }
1378 
1379  if (!Node->isMachineOpcode())
1380  continue;
1381  // If we're in the middle of scheduling a call, don't begin scheduling
1382  // another call. Also, don't allow any physical registers to be live across
1383  // the call.
1384  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1385  // Check the special calling-sequence resource.
1386  unsigned CallResource = TRI->getNumRegs();
1387  if (LiveRegDefs[CallResource]) {
1388  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1389  while (SDNode *Glued = Gen->getGluedNode())
1390  Gen = Glued;
1391  if (!IsChainDependent(Gen, Node, 0, TII) &&
1392  RegAdded.insert(CallResource).second)
1393  LRegs.push_back(CallResource);
1394  }
1395  }
1396  if (const uint32_t *RegMask = getNodeRegMask(Node))
1397  CheckForLiveRegDefMasked(SU, RegMask,
1398  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1399  RegAdded, LRegs);
1400 
1401  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1402  if (MCID.hasOptionalDef()) {
1403  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1404  // This operand can be either a def of CPSR, if the S bit is set; or a use
1405  // of %noreg. When the OptionalDef is set to a valid register, we need to
1406  // handle it in the same way as an ImplicitDef.
1407  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1408  if (MCID.OpInfo[i].isOptionalDef()) {
1409  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1410  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1411  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1412  }
1413  }
1414  if (!MCID.ImplicitDefs)
1415  continue;
1416  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1417  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1418  }
1419 
1420  return !LRegs.empty();
1421 }
1422 
1423 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1424  // Add the nodes that aren't ready back onto the available list.
1425  for (unsigned i = Interferences.size(); i > 0; --i) {
1426  SUnit *SU = Interferences[i-1];
1427  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1428  if (Reg) {
1429  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1430  if (!is_contained(LRegs, Reg))
1431  continue;
1432  }
1433  SU->isPending = false;
1434  // The interfering node may no longer be available due to backtracking.
1435  // Furthermore, it may have been made available again, in which case it is
1436  // now already in the AvailableQueue.
1437  if (SU->isAvailable && !SU->NodeQueueId) {
1438  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1439  AvailableQueue->push(SU);
1440  }
1441  if (i < Interferences.size())
1442  Interferences[i-1] = Interferences.back();
1443  Interferences.pop_back();
1444  LRegsMap.erase(LRegsPos);
1445  }
1446 }
1447 
1448 /// Return a node that can be scheduled in this cycle. Requirements:
1449 /// (1) Ready: latency has been satisfied
1450 /// (2) No Hazards: resources are available
1451 /// (3) No Interferences: may unschedule to break register interferences.
1452 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1453  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1454  auto FindAvailableNode = [&]() {
1455  while (CurSU) {
1457  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1458  break;
1459  LLVM_DEBUG(dbgs() << " Interfering reg ";
1460  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1461  else dbgs() << printReg(LRegs[0], TRI);
1462  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1463  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1464  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1465  if (LRegsPair.second) {
1466  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1467  Interferences.push_back(CurSU);
1468  }
1469  else {
1470  assert(CurSU->isPending && "Interferences are pending");
1471  // Update the interference with current live regs.
1472  LRegsPair.first->second = LRegs;
1473  }
1474  CurSU = AvailableQueue->pop();
1475  }
1476  };
1477  FindAvailableNode();
1478  if (CurSU)
1479  return CurSU;
1480 
1481  // All candidates are delayed due to live physical reg dependencies.
1482  // Try backtracking, code duplication, or inserting cross class copies
1483  // to resolve it.
1484  for (SUnit *TrySU : Interferences) {
1485  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1486 
1487  // Try unscheduling up to the point where it's safe to schedule
1488  // this node.
1489  SUnit *BtSU = nullptr;
1490  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1491  for (unsigned Reg : LRegs) {
1492  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1493  BtSU = LiveRegGens[Reg];
1494  LiveCycle = BtSU->getHeight();
1495  }
1496  }
1497  if (!WillCreateCycle(TrySU, BtSU)) {
1498  // BacktrackBottomUp mutates Interferences!
1499  BacktrackBottomUp(TrySU, BtSU);
1500 
1501  // Force the current node to be scheduled before the node that
1502  // requires the physical reg dep.
1503  if (BtSU->isAvailable) {
1504  BtSU->isAvailable = false;
1505  if (!BtSU->isPending)
1506  AvailableQueue->remove(BtSU);
1507  }
1508  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1509  << ") to SU(" << TrySU->NodeNum << ")\n");
1510  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1511 
1512  // If one or more successors has been unscheduled, then the current
1513  // node is no longer available.
1514  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1515  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1516  CurSU = AvailableQueue->pop();
1517  } else {
1518  LLVM_DEBUG(dbgs() << "TrySU available\n");
1519  // Available and in AvailableQueue
1520  AvailableQueue->remove(TrySU);
1521  CurSU = TrySU;
1522  }
1523  FindAvailableNode();
1524  // Interferences has been mutated. We must break.
1525  break;
1526  }
1527  }
1528 
1529  if (!CurSU) {
1530  // Can't backtrack. If it's too expensive to copy the value, then try
1531  // duplicate the nodes that produces these "too expensive to copy"
1532  // values to break the dependency. In case even that doesn't work,
1533  // insert cross class copies.
1534  // If it's not too expensive, i.e. cost != -1, issue copies.
1535  SUnit *TrySU = Interferences[0];
1536  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1537  assert(LRegs.size() == 1 && "Can't handle this yet!");
1538  unsigned Reg = LRegs[0];
1539  SUnit *LRDef = LiveRegDefs[Reg];
1540  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1541  const TargetRegisterClass *RC =
1542  TRI->getMinimalPhysRegClass(Reg, VT);
1543  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1544 
1545  // If cross copy register class is the same as RC, then it must be possible
1546  // copy the value directly. Do not try duplicate the def.
1547  // If cross copy register class is not the same as RC, then it's possible to
1548  // copy the value but it require cross register class copies and it is
1549  // expensive.
1550  // If cross copy register class is null, then it's not possible to copy
1551  // the value at all.
1552  SUnit *NewDef = nullptr;
1553  if (DestRC != RC) {
1554  NewDef = CopyAndMoveSuccessors(LRDef);
1555  if (!DestRC && !NewDef)
1556  report_fatal_error("Can't handle live physical register dependency!");
1557  }
1558  if (!NewDef) {
1559  // Issue copies, these can be expensive cross register class copies.
1561  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1562  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1563  << " to SU #" << Copies.front()->NodeNum << "\n");
1564  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1565  NewDef = Copies.back();
1566  }
1567 
1568  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1569  << " to SU #" << TrySU->NodeNum << "\n");
1570  LiveRegDefs[Reg] = NewDef;
1571  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1572  TrySU->isAvailable = false;
1573  CurSU = NewDef;
1574  }
1575  assert(CurSU && "Unable to resolve live physical register dependencies!");
1576  return CurSU;
1577 }
1578 
1579 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1580 /// schedulers.
1581 void ScheduleDAGRRList::ListScheduleBottomUp() {
1582  // Release any predecessors of the special Exit node.
1583  ReleasePredecessors(&ExitSU);
1584 
1585  // Add root to Available queue.
1586  if (!SUnits.empty()) {
1587  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1588  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1589  RootSU->isAvailable = true;
1590  AvailableQueue->push(RootSU);
1591  }
1592 
1593  // While Available queue is not empty, grab the node with the highest
1594  // priority. If it is not ready put it back. Schedule the node.
1595  Sequence.reserve(SUnits.size());
1596  while (!AvailableQueue->empty() || !Interferences.empty()) {
1597  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1598  AvailableQueue->dump(this));
1599 
1600  // Pick the best node to schedule taking all constraints into
1601  // consideration.
1602  SUnit *SU = PickNodeToScheduleBottomUp();
1603 
1604  AdvancePastStalls(SU);
1605 
1606  ScheduleNodeBottomUp(SU);
1607 
1608  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1609  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1610  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1611  "MinAvailableCycle uninitialized");
1612  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1613  }
1614  }
1615 
1616  // Reverse the order if it is bottom up.
1617  std::reverse(Sequence.begin(), Sequence.end());
1618 
1619 #ifndef NDEBUG
1620  VerifyScheduledSequence(/*isBottomUp=*/true);
1621 #endif
1622 }
1623 
1624 namespace {
1625 
1626 class RegReductionPQBase;
1627 
1628 struct queue_sort {
1629  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1630 };
1631 
1632 #ifndef NDEBUG
1633 template<class SF>
1634 struct reverse_sort : public queue_sort {
1635  SF &SortFunc;
1636 
1637  reverse_sort(SF &sf) : SortFunc(sf) {}
1638 
1639  bool operator()(SUnit* left, SUnit* right) const {
1640  // reverse left/right rather than simply !SortFunc(left, right)
1641  // to expose different paths in the comparison logic.
1642  return SortFunc(right, left);
1643  }
1644 };
1645 #endif // NDEBUG
1646 
1647 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1648 // reduction scheduler.
1649 struct bu_ls_rr_sort : public queue_sort {
1650  enum {
1651  IsBottomUp = true,
1652  HasReadyFilter = false
1653  };
1654 
1655  RegReductionPQBase *SPQ;
1656 
1657  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1658 
1659  bool operator()(SUnit* left, SUnit* right) const;
1660 };
1661 
1662 // src_ls_rr_sort - Priority function for source order scheduler.
1663 struct src_ls_rr_sort : public queue_sort {
1664  enum {
1665  IsBottomUp = true,
1666  HasReadyFilter = false
1667  };
1668 
1669  RegReductionPQBase *SPQ;
1670 
1671  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1672 
1673  bool operator()(SUnit* left, SUnit* right) const;
1674 };
1675 
1676 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1677 struct hybrid_ls_rr_sort : public queue_sort {
1678  enum {
1679  IsBottomUp = true,
1680  HasReadyFilter = false
1681  };
1682 
1683  RegReductionPQBase *SPQ;
1684 
1685  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1686 
1687  bool isReady(SUnit *SU, unsigned CurCycle) const;
1688 
1689  bool operator()(SUnit* left, SUnit* right) const;
1690 };
1691 
1692 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1693 // scheduler.
1694 struct ilp_ls_rr_sort : public queue_sort {
1695  enum {
1696  IsBottomUp = true,
1697  HasReadyFilter = false
1698  };
1699 
1700  RegReductionPQBase *SPQ;
1701 
1702  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1703 
1704  bool isReady(SUnit *SU, unsigned CurCycle) const;
1705 
1706  bool operator()(SUnit* left, SUnit* right) const;
1707 };
1708 
1709 class RegReductionPQBase : public SchedulingPriorityQueue {
1710 protected:
1711  std::vector<SUnit *> Queue;
1712  unsigned CurQueueId = 0;
1713  bool TracksRegPressure;
1714  bool SrcOrder;
1715 
1716  // SUnits - The SUnits for the current graph.
1717  std::vector<SUnit> *SUnits;
1718 
1719  MachineFunction &MF;
1720  const TargetInstrInfo *TII;
1721  const TargetRegisterInfo *TRI;
1722  const TargetLowering *TLI;
1723  ScheduleDAGRRList *scheduleDAG = nullptr;
1724 
1725  // SethiUllmanNumbers - The SethiUllman number for each node.
1726  std::vector<unsigned> SethiUllmanNumbers;
1727 
1728  /// RegPressure - Tracking current reg pressure per register class.
1729  std::vector<unsigned> RegPressure;
1730 
1731  /// RegLimit - Tracking the number of allocatable registers per register
1732  /// class.
1733  std::vector<unsigned> RegLimit;
1734 
1735 public:
1736  RegReductionPQBase(MachineFunction &mf,
1737  bool hasReadyFilter,
1738  bool tracksrp,
1739  bool srcorder,
1740  const TargetInstrInfo *tii,
1741  const TargetRegisterInfo *tri,
1742  const TargetLowering *tli)
1743  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1744  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1745  if (TracksRegPressure) {
1746  unsigned NumRC = TRI->getNumRegClasses();
1747  RegLimit.resize(NumRC);
1748  RegPressure.resize(NumRC);
1749  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1750  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1751  for (const TargetRegisterClass *RC : TRI->regclasses())
1752  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1753  }
1754  }
1755 
1756  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1757  scheduleDAG = scheduleDag;
1758  }
1759 
1760  ScheduleHazardRecognizer* getHazardRec() {
1761  return scheduleDAG->getHazardRec();
1762  }
1763 
1764  void initNodes(std::vector<SUnit> &sunits) override;
1765 
1766  void addNode(const SUnit *SU) override;
1767 
1768  void updateNode(const SUnit *SU) override;
1769 
1770  void releaseState() override {
1771  SUnits = nullptr;
1772  SethiUllmanNumbers.clear();
1773  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1774  }
1775 
1776  unsigned getNodePriority(const SUnit *SU) const;
1777 
1778  unsigned getNodeOrdering(const SUnit *SU) const {
1779  if (!SU->getNode()) return 0;
1780 
1781  return SU->getNode()->getIROrder();
1782  }
1783 
1784  bool empty() const override { return Queue.empty(); }
1785 
1786  void push(SUnit *U) override {
1787  assert(!U->NodeQueueId && "Node in the queue already");
1788  U->NodeQueueId = ++CurQueueId;
1789  Queue.push_back(U);
1790  }
1791 
1792  void remove(SUnit *SU) override {
1793  assert(!Queue.empty() && "Queue is empty!");
1794  assert(SU->NodeQueueId != 0 && "Not in queue!");
1795  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1796  if (I != std::prev(Queue.end()))
1797  std::swap(*I, Queue.back());
1798  Queue.pop_back();
1799  SU->NodeQueueId = 0;
1800  }
1801 
1802  bool tracksRegPressure() const override { return TracksRegPressure; }
1803 
1804  void dumpRegPressure() const;
1805 
1806  bool HighRegPressure(const SUnit *SU) const;
1807 
1808  bool MayReduceRegPressure(SUnit *SU) const;
1809 
1810  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1811 
1812  void scheduledNode(SUnit *SU) override;
1813 
1814  void unscheduledNode(SUnit *SU) override;
1815 
1816 protected:
1817  bool canClobber(const SUnit *SU, const SUnit *Op);
1818  void AddPseudoTwoAddrDeps();
1819  void PrescheduleNodesWithMultipleUses();
1820  void CalculateSethiUllmanNumbers();
1821 };
1822 
1823 template<class SF>
1824 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1825  std::vector<SUnit *>::iterator Best = Q.begin();
1826  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1827  if (Picker(*Best, *I))
1828  Best = I;
1829  SUnit *V = *Best;
1830  if (Best != std::prev(Q.end()))
1831  std::swap(*Best, Q.back());
1832  Q.pop_back();
1833  return V;
1834 }
1835 
1836 template<class SF>
1837 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1838 #ifndef NDEBUG
1839  if (DAG->StressSched) {
1840  reverse_sort<SF> RPicker(Picker);
1841  return popFromQueueImpl(Q, RPicker);
1842  }
1843 #endif
1844  (void)DAG;
1845  return popFromQueueImpl(Q, Picker);
1846 }
1847 
1848 //===----------------------------------------------------------------------===//
1849 // RegReductionPriorityQueue Definition
1850 //===----------------------------------------------------------------------===//
1851 //
1852 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1853 // to reduce register pressure.
1854 //
1855 template<class SF>
1856 class RegReductionPriorityQueue : public RegReductionPQBase {
1857  SF Picker;
1858 
1859 public:
1860  RegReductionPriorityQueue(MachineFunction &mf,
1861  bool tracksrp,
1862  bool srcorder,
1863  const TargetInstrInfo *tii,
1864  const TargetRegisterInfo *tri,
1865  const TargetLowering *tli)
1866  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1867  tii, tri, tli),
1868  Picker(this) {}
1869 
1870  bool isBottomUp() const override { return SF::IsBottomUp; }
1871 
1872  bool isReady(SUnit *U) const override {
1873  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1874  }
1875 
1876  SUnit *pop() override {
1877  if (Queue.empty()) return nullptr;
1878 
1879  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1880  V->NodeQueueId = 0;
1881  return V;
1882  }
1883 
1884 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1886  // Emulate pop() without clobbering NodeQueueIds.
1887  std::vector<SUnit *> DumpQueue = Queue;
1888  SF DumpPicker = Picker;
1889  while (!DumpQueue.empty()) {
1890  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1891  dbgs() << "Height " << SU->getHeight() << ": ";
1892  DAG->dumpNode(*SU);
1893  }
1894  }
1895 #endif
1896 };
1897 
1898 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1899 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1900 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1901 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1902 
1903 } // end anonymous namespace
1904 
1905 //===----------------------------------------------------------------------===//
1906 // Static Node Priority for Register Pressure Reduction
1907 //===----------------------------------------------------------------------===//
1908 
1909 // Check for special nodes that bypass scheduling heuristics.
1910 // Currently this pushes TokenFactor nodes down, but may be used for other
1911 // pseudo-ops as well.
1912 //
1913 // Return -1 to schedule right above left, 1 for left above right.
1914 // Return 0 if no bias exists.
1915 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1916  bool LSchedLow = left->isScheduleLow;
1917  bool RSchedLow = right->isScheduleLow;
1918  if (LSchedLow != RSchedLow)
1919  return LSchedLow < RSchedLow ? 1 : -1;
1920  return 0;
1921 }
1922 
1923 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1924 /// Smaller number is the higher priority.
1925 static unsigned
1926 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1927  if (SUNumbers[SU->NodeNum] != 0)
1928  return SUNumbers[SU->NodeNum];
1929 
1930  // Use WorkList to avoid stack overflow on excessively large IRs.
1931  struct WorkState {
1932  WorkState(const SUnit *SU) : SU(SU) {}
1933  const SUnit *SU;
1934  unsigned PredsProcessed = 0;
1935  };
1936 
1937  SmallVector<WorkState, 16> WorkList;
1938  WorkList.push_back(SU);
1939  while (!WorkList.empty()) {
1940  auto &Temp = WorkList.back();
1941  auto *TempSU = Temp.SU;
1942  bool AllPredsKnown = true;
1943  // Try to find a non-evaluated pred and push it into the processing stack.
1944  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1945  auto &Pred = TempSU->Preds[P];
1946  if (Pred.isCtrl()) continue; // ignore chain preds
1947  SUnit *PredSU = Pred.getSUnit();
1948  if (SUNumbers[PredSU->NodeNum] == 0) {
1949 #ifndef NDEBUG
1950  // In debug mode, check that we don't have such element in the stack.
1951  for (auto It : WorkList)
1952  assert(It.SU != PredSU && "Trying to push an element twice?");
1953 #endif
1954  // Next time start processing this one starting from the next pred.
1955  Temp.PredsProcessed = P + 1;
1956  WorkList.push_back(PredSU);
1957  AllPredsKnown = false;
1958  break;
1959  }
1960  }
1961 
1962  if (!AllPredsKnown)
1963  continue;
1964 
1965  // Once all preds are known, we can calculate the answer for this one.
1966  unsigned SethiUllmanNumber = 0;
1967  unsigned Extra = 0;
1968  for (const SDep &Pred : TempSU->Preds) {
1969  if (Pred.isCtrl()) continue; // ignore chain preds
1970  SUnit *PredSU = Pred.getSUnit();
1971  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1972  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1973  if (PredSethiUllman > SethiUllmanNumber) {
1974  SethiUllmanNumber = PredSethiUllman;
1975  Extra = 0;
1976  } else if (PredSethiUllman == SethiUllmanNumber)
1977  ++Extra;
1978  }
1979 
1980  SethiUllmanNumber += Extra;
1981  if (SethiUllmanNumber == 0)
1982  SethiUllmanNumber = 1;
1983  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1984  WorkList.pop_back();
1985  }
1986 
1987  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1988  return SUNumbers[SU->NodeNum];
1989 }
1990 
1991 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1992 /// scheduling units.
1993 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1994  SethiUllmanNumbers.assign(SUnits->size(), 0);
1995 
1996  for (const SUnit &SU : *SUnits)
1997  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1998 }
1999 
2000 void RegReductionPQBase::addNode(const SUnit *SU) {
2001  unsigned SUSize = SethiUllmanNumbers.size();
2002  if (SUnits->size() > SUSize)
2003  SethiUllmanNumbers.resize(SUSize*2, 0);
2004  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2005 }
2006 
2007 void RegReductionPQBase::updateNode(const SUnit *SU) {
2008  SethiUllmanNumbers[SU->NodeNum] = 0;
2009  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2010 }
2011 
2012 // Lower priority means schedule further down. For bottom-up scheduling, lower
2013 // priority SUs are scheduled before higher priority SUs.
2014 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2015  assert(SU->NodeNum < SethiUllmanNumbers.size());
2016  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2017  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2018  // CopyToReg should be close to its uses to facilitate coalescing and
2019  // avoid spilling.
2020  return 0;
2021  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2022  Opc == TargetOpcode::SUBREG_TO_REG ||
2023  Opc == TargetOpcode::INSERT_SUBREG)
2024  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2025  // close to their uses to facilitate coalescing.
2026  return 0;
2027  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2028  // If SU does not have a register use, i.e. it doesn't produce a value
2029  // that would be consumed (e.g. store), then it terminates a chain of
2030  // computation. Give it a large SethiUllman number so it will be
2031  // scheduled right before its predecessors that it doesn't lengthen
2032  // their live ranges.
2033  return 0xffff;
2034  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2035  // If SU does not have a register def, schedule it close to its uses
2036  // because it does not lengthen any live ranges.
2037  return 0;
2038 #if 1
2039  return SethiUllmanNumbers[SU->NodeNum];
2040 #else
2041  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2042  if (SU->isCallOp) {
2043  // FIXME: This assumes all of the defs are used as call operands.
2044  int NP = (int)Priority - SU->getNode()->getNumValues();
2045  return (NP > 0) ? NP : 0;
2046  }
2047  return Priority;
2048 #endif
2049 }
2050 
2051 //===----------------------------------------------------------------------===//
2052 // Register Pressure Tracking
2053 //===----------------------------------------------------------------------===//
2054 
2055 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2056 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2057  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2058  unsigned Id = RC->getID();
2059  unsigned RP = RegPressure[Id];
2060  if (!RP) continue;
2061  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2062  << RegLimit[Id] << '\n');
2063  }
2064 }
2065 #endif
2066 
2067 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2068  if (!TLI)
2069  return false;
2070 
2071  for (const SDep &Pred : SU->Preds) {
2072  if (Pred.isCtrl())
2073  continue;
2074  SUnit *PredSU = Pred.getSUnit();
2075  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2076  // to cover the number of registers defined (they are all live).
2077  if (PredSU->NumRegDefsLeft == 0) {
2078  continue;
2079  }
2080  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2081  RegDefPos.IsValid(); RegDefPos.Advance()) {
2082  unsigned RCId, Cost;
2083  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2084 
2085  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2086  return true;
2087  }
2088  }
2089  return false;
2090 }
2091 
2092 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2093  const SDNode *N = SU->getNode();
2094 
2095  if (!N->isMachineOpcode() || !SU->NumSuccs)
2096  return false;
2097 
2098  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2099  for (unsigned i = 0; i != NumDefs; ++i) {
2100  MVT VT = N->getSimpleValueType(i);
2101  if (!N->hasAnyUseOfValue(i))
2102  continue;
2103  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2104  if (RegPressure[RCId] >= RegLimit[RCId])
2105  return true;
2106  }
2107  return false;
2108 }
2109 
2110 // Compute the register pressure contribution by this instruction by count up
2111 // for uses that are not live and down for defs. Only count register classes
2112 // that are already under high pressure. As a side effect, compute the number of
2113 // uses of registers that are already live.
2114 //
2115 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2116 // so could probably be factored.
2117 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2118  LiveUses = 0;
2119  int PDiff = 0;
2120  for (const SDep &Pred : SU->Preds) {
2121  if (Pred.isCtrl())
2122  continue;
2123  SUnit *PredSU = Pred.getSUnit();
2124  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2125  // to cover the number of registers defined (they are all live).
2126  if (PredSU->NumRegDefsLeft == 0) {
2127  if (PredSU->getNode()->isMachineOpcode())
2128  ++LiveUses;
2129  continue;
2130  }
2131  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2132  RegDefPos.IsValid(); RegDefPos.Advance()) {
2133  MVT VT = RegDefPos.GetValue();
2134  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2135  if (RegPressure[RCId] >= RegLimit[RCId])
2136  ++PDiff;
2137  }
2138  }
2139  const SDNode *N = SU->getNode();
2140 
2141  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2142  return PDiff;
2143 
2144  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2145  for (unsigned i = 0; i != NumDefs; ++i) {
2146  MVT VT = N->getSimpleValueType(i);
2147  if (!N->hasAnyUseOfValue(i))
2148  continue;
2149  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2150  if (RegPressure[RCId] >= RegLimit[RCId])
2151  --PDiff;
2152  }
2153  return PDiff;
2154 }
2155 
2156 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2157  if (!TracksRegPressure)
2158  return;
2159 
2160  if (!SU->getNode())
2161  return;
2162 
2163  for (const SDep &Pred : SU->Preds) {
2164  if (Pred.isCtrl())
2165  continue;
2166  SUnit *PredSU = Pred.getSUnit();
2167  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2168  // to cover the number of registers defined (they are all live).
2169  if (PredSU->NumRegDefsLeft == 0) {
2170  continue;
2171  }
2172  // FIXME: The ScheduleDAG currently loses information about which of a
2173  // node's values is consumed by each dependence. Consequently, if the node
2174  // defines multiple register classes, we don't know which to pressurize
2175  // here. Instead the following loop consumes the register defs in an
2176  // arbitrary order. At least it handles the common case of clustered loads
2177  // to the same class. For precise liveness, each SDep needs to indicate the
2178  // result number. But that tightly couples the ScheduleDAG with the
2179  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2180  // value type or register class to SDep.
2181  //
2182  // The most important aspect of register tracking is balancing the increase
2183  // here with the reduction further below. Note that this SU may use multiple
2184  // defs in PredSU. The can't be determined here, but we've already
2185  // compensated by reducing NumRegDefsLeft in PredSU during
2186  // ScheduleDAGSDNodes::AddSchedEdges.
2187  --PredSU->NumRegDefsLeft;
2188  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2189  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2190  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2191  if (SkipRegDefs)
2192  continue;
2193 
2194  unsigned RCId, Cost;
2195  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2196  RegPressure[RCId] += Cost;
2197  break;
2198  }
2199  }
2200 
2201  // We should have this assert, but there may be dead SDNodes that never
2202  // materialize as SUnits, so they don't appear to generate liveness.
2203  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2204  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2205  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2206  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2207  if (SkipRegDefs > 0)
2208  continue;
2209  unsigned RCId, Cost;
2210  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2211  if (RegPressure[RCId] < Cost) {
2212  // Register pressure tracking is imprecise. This can happen. But we try
2213  // hard not to let it happen because it likely results in poor scheduling.
2214  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2215  << ") has too many regdefs\n");
2216  RegPressure[RCId] = 0;
2217  }
2218  else {
2219  RegPressure[RCId] -= Cost;
2220  }
2221  }
2222  LLVM_DEBUG(dumpRegPressure());
2223 }
2224 
2225 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2226  if (!TracksRegPressure)
2227  return;
2228 
2229  const SDNode *N = SU->getNode();
2230  if (!N) return;
2231 
2232  if (!N->isMachineOpcode()) {
2233  if (N->getOpcode() != ISD::CopyToReg)
2234  return;
2235  } else {
2236  unsigned Opc = N->getMachineOpcode();
2237  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2238  Opc == TargetOpcode::INSERT_SUBREG ||
2239  Opc == TargetOpcode::SUBREG_TO_REG ||
2240  Opc == TargetOpcode::REG_SEQUENCE ||
2241  Opc == TargetOpcode::IMPLICIT_DEF)
2242  return;
2243  }
2244 
2245  for (const SDep &Pred : SU->Preds) {
2246  if (Pred.isCtrl())
2247  continue;
2248  SUnit *PredSU = Pred.getSUnit();
2249  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2250  // counts data deps.
2251  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2252  continue;
2253  const SDNode *PN = PredSU->getNode();
2254  if (!PN->isMachineOpcode()) {
2255  if (PN->getOpcode() == ISD::CopyFromReg) {
2256  MVT VT = PN->getSimpleValueType(0);
2257  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2258  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2259  }
2260  continue;
2261  }
2262  unsigned POpc = PN->getMachineOpcode();
2263  if (POpc == TargetOpcode::IMPLICIT_DEF)
2264  continue;
2265  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2266  POpc == TargetOpcode::INSERT_SUBREG ||
2267  POpc == TargetOpcode::SUBREG_TO_REG) {
2268  MVT VT = PN->getSimpleValueType(0);
2269  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2270  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2271  continue;
2272  }
2273  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2274  for (unsigned i = 0; i != NumDefs; ++i) {
2275  MVT VT = PN->getSimpleValueType(i);
2276  if (!PN->hasAnyUseOfValue(i))
2277  continue;
2278  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2279  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2280  // Register pressure tracking is imprecise. This can happen.
2281  RegPressure[RCId] = 0;
2282  else
2283  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2284  }
2285  }
2286 
2287  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2288  // may transfer data dependencies to CopyToReg.
2289  if (SU->NumSuccs && N->isMachineOpcode()) {
2290  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2291  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2292  MVT VT = N->getSimpleValueType(i);
2293  if (VT == MVT::Glue || VT == MVT::Other)
2294  continue;
2295  if (!N->hasAnyUseOfValue(i))
2296  continue;
2297  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2298  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2299  }
2300  }
2301 
2302  LLVM_DEBUG(dumpRegPressure());
2303 }
2304 
2305 //===----------------------------------------------------------------------===//
2306 // Dynamic Node Priority for Register Pressure Reduction
2307 //===----------------------------------------------------------------------===//
2308 
2309 /// closestSucc - Returns the scheduled cycle of the successor which is
2310 /// closest to the current cycle.
2311 static unsigned closestSucc(const SUnit *SU) {
2312  unsigned MaxHeight = 0;
2313  for (const SDep &Succ : SU->Succs) {
2314  if (Succ.isCtrl()) continue; // ignore chain succs
2315  unsigned Height = Succ.getSUnit()->getHeight();
2316  // If there are bunch of CopyToRegs stacked up, they should be considered
2317  // to be at the same position.
2318  if (Succ.getSUnit()->getNode() &&
2319  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2320  Height = closestSucc(Succ.getSUnit())+1;
2321  if (Height > MaxHeight)
2322  MaxHeight = Height;
2323  }
2324  return MaxHeight;
2325 }
2326 
2327 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2328 /// for scratch registers, i.e. number of data dependencies.
2329 static unsigned calcMaxScratches(const SUnit *SU) {
2330  unsigned Scratches = 0;
2331  for (const SDep &Pred : SU->Preds) {
2332  if (Pred.isCtrl()) continue; // ignore chain preds
2333  Scratches++;
2334  }
2335  return Scratches;
2336 }
2337 
2338 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2339 /// CopyFromReg from a virtual register.
2340 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2341  bool RetVal = false;
2342  for (const SDep &Pred : SU->Preds) {
2343  if (Pred.isCtrl()) continue;
2344  const SUnit *PredSU = Pred.getSUnit();
2345  if (PredSU->getNode() &&
2346  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2347  unsigned Reg =
2348  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2350  RetVal = true;
2351  continue;
2352  }
2353  }
2354  return false;
2355  }
2356  return RetVal;
2357 }
2358 
2359 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2360 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2361 /// it has no other use. It should be scheduled closer to the terminator.
2362 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2363  bool RetVal = false;
2364  for (const SDep &Succ : SU->Succs) {
2365  if (Succ.isCtrl()) continue;
2366  const SUnit *SuccSU = Succ.getSUnit();
2367  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2368  unsigned Reg =
2369  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2371  RetVal = true;
2372  continue;
2373  }
2374  }
2375  return false;
2376  }
2377  return RetVal;
2378 }
2379 
2380 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2381 // set isVRegCycle for its CopyFromReg operands.
2382 //
2383 // This is only relevant for single-block loops, in which case the VRegCycle
2384 // node is likely an induction variable in which the operand and target virtual
2385 // registers should be coalesced (e.g. pre/post increment values). Setting the
2386 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2387 // CopyFromReg so that this node becomes the virtual register "kill". This
2388 // avoids interference between the values live in and out of the block and
2389 // eliminates a copy inside the loop.
2390 static void initVRegCycle(SUnit *SU) {
2392  return;
2393 
2394  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2395  return;
2396 
2397  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2398 
2399  SU->isVRegCycle = true;
2400 
2401  for (const SDep &Pred : SU->Preds) {
2402  if (Pred.isCtrl()) continue;
2403  Pred.getSUnit()->isVRegCycle = true;
2404  }
2405 }
2406 
2407 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2408 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2409 static void resetVRegCycle(SUnit *SU) {
2410  if (!SU->isVRegCycle)
2411  return;
2412 
2413  for (const SDep &Pred : SU->Preds) {
2414  if (Pred.isCtrl()) continue; // ignore chain preds
2415  SUnit *PredSU = Pred.getSUnit();
2416  if (PredSU->isVRegCycle) {
2417  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2418  "VRegCycle def must be CopyFromReg");
2419  Pred.getSUnit()->isVRegCycle = false;
2420  }
2421  }
2422 }
2423 
2424 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2425 // means a node that defines the VRegCycle has not been scheduled yet.
2426 static bool hasVRegCycleUse(const SUnit *SU) {
2427  // If this SU also defines the VReg, don't hoist it as a "use".
2428  if (SU->isVRegCycle)
2429  return false;
2430 
2431  for (const SDep &Pred : SU->Preds) {
2432  if (Pred.isCtrl()) continue; // ignore chain preds
2433  if (Pred.getSUnit()->isVRegCycle &&
2434  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2435  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2436  return true;
2437  }
2438  }
2439  return false;
2440 }
2441 
2442 // Check for either a dependence (latency) or resource (hazard) stall.
2443 //
2444 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2445 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2446  if ((int)SPQ->getCurCycle() < Height) return true;
2447  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2449  return true;
2450  return false;
2451 }
2452 
2453 // Return -1 if left has higher priority, 1 if right has higher priority.
2454 // Return 0 if latency-based priority is equivalent.
2455 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2456  RegReductionPQBase *SPQ) {
2457  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2458  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2459  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2460  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2461  int LHeight = (int)left->getHeight() + LPenalty;
2462  int RHeight = (int)right->getHeight() + RPenalty;
2463 
2464  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2465  BUHasStall(left, LHeight, SPQ);
2466  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2467  BUHasStall(right, RHeight, SPQ);
2468 
2469  // If scheduling one of the node will cause a pipeline stall, delay it.
2470  // If scheduling either one of the node will cause a pipeline stall, sort
2471  // them according to their height.
2472  if (LStall) {
2473  if (!RStall)
2474  return 1;
2475  if (LHeight != RHeight)
2476  return LHeight > RHeight ? 1 : -1;
2477  } else if (RStall)
2478  return -1;
2479 
2480  // If either node is scheduling for latency, sort them by height/depth
2481  // and latency.
2482  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2483  right->SchedulingPref == Sched::ILP)) {
2484  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2485  // is enabled, grouping instructions by cycle, then its height is already
2486  // covered so only its depth matters. We also reach this point if both stall
2487  // but have the same height.
2488  if (!SPQ->getHazardRec()->isEnabled()) {
2489  if (LHeight != RHeight)
2490  return LHeight > RHeight ? 1 : -1;
2491  }
2492  int LDepth = left->getDepth() - LPenalty;
2493  int RDepth = right->getDepth() - RPenalty;
2494  if (LDepth != RDepth) {
2495  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2496  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2497  << ") depth " << RDepth << "\n");
2498  return LDepth < RDepth ? 1 : -1;
2499  }
2500  if (left->Latency != right->Latency)
2501  return left->Latency > right->Latency ? 1 : -1;
2502  }
2503  return 0;
2504 }
2505 
2506 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2507  // Schedule physical register definitions close to their use. This is
2508  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2509  // long as shortening physreg live ranges is generally good, we can defer
2510  // creating a subtarget hook.
2511  if (!DisableSchedPhysRegJoin) {
2512  bool LHasPhysReg = left->hasPhysRegDefs;
2513  bool RHasPhysReg = right->hasPhysRegDefs;
2514  if (LHasPhysReg != RHasPhysReg) {
2515  #ifndef NDEBUG
2516  static const char *const PhysRegMsg[] = { " has no physreg",
2517  " defines a physreg" };
2518  #endif
2519  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2520  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2521  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2522  return LHasPhysReg < RHasPhysReg;
2523  }
2524  }
2525 
2526  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2527  unsigned LPriority = SPQ->getNodePriority(left);
2528  unsigned RPriority = SPQ->getNodePriority(right);
2529 
2530  // Be really careful about hoisting call operands above previous calls.
2531  // Only allows it if it would reduce register pressure.
2532  if (left->isCall && right->isCallOp) {
2533  unsigned RNumVals = right->getNode()->getNumValues();
2534  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2535  }
2536  if (right->isCall && left->isCallOp) {
2537  unsigned LNumVals = left->getNode()->getNumValues();
2538  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2539  }
2540 
2541  if (LPriority != RPriority)
2542  return LPriority > RPriority;
2543 
2544  // One or both of the nodes are calls and their sethi-ullman numbers are the
2545  // same, then keep source order.
2546  if (left->isCall || right->isCall) {
2547  unsigned LOrder = SPQ->getNodeOrdering(left);
2548  unsigned ROrder = SPQ->getNodeOrdering(right);
2549 
2550  // Prefer an ordering where the lower the non-zero order number, the higher
2551  // the preference.
2552  if ((LOrder || ROrder) && LOrder != ROrder)
2553  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2554  }
2555 
2556  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2557  // e.g.
2558  // t1 = op t2, c1
2559  // t3 = op t4, c2
2560  //
2561  // and the following instructions are both ready.
2562  // t2 = op c3
2563  // t4 = op c4
2564  //
2565  // Then schedule t2 = op first.
2566  // i.e.
2567  // t4 = op c4
2568  // t2 = op c3
2569  // t1 = op t2, c1
2570  // t3 = op t4, c2
2571  //
2572  // This creates more short live intervals.
2573  unsigned LDist = closestSucc(left);
2574  unsigned RDist = closestSucc(right);
2575  if (LDist != RDist)
2576  return LDist < RDist;
2577 
2578  // How many registers becomes live when the node is scheduled.
2579  unsigned LScratch = calcMaxScratches(left);
2580  unsigned RScratch = calcMaxScratches(right);
2581  if (LScratch != RScratch)
2582  return LScratch > RScratch;
2583 
2584  // Comparing latency against a call makes little sense unless the node
2585  // is register pressure-neutral.
2586  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2587  return (left->NodeQueueId > right->NodeQueueId);
2588 
2589  // Do not compare latencies when one or both of the nodes are calls.
2590  if (!DisableSchedCycles &&
2591  !(left->isCall || right->isCall)) {
2592  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2593  if (result != 0)
2594  return result > 0;
2595  }
2596  else {
2597  if (left->getHeight() != right->getHeight())
2598  return left->getHeight() > right->getHeight();
2599 
2600  if (left->getDepth() != right->getDepth())
2601  return left->getDepth() < right->getDepth();
2602  }
2603 
2604  assert(left->NodeQueueId && right->NodeQueueId &&
2605  "NodeQueueId cannot be zero");
2606  return (left->NodeQueueId > right->NodeQueueId);
2607 }
2608 
2609 // Bottom up
2610 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2611  if (int res = checkSpecialNodes(left, right))
2612  return res > 0;
2613 
2614  return BURRSort(left, right, SPQ);
2615 }
2616 
2617 // Source order, otherwise bottom up.
2618 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2619  if (int res = checkSpecialNodes(left, right))
2620  return res > 0;
2621 
2622  unsigned LOrder = SPQ->getNodeOrdering(left);
2623  unsigned ROrder = SPQ->getNodeOrdering(right);
2624 
2625  // Prefer an ordering where the lower the non-zero order number, the higher
2626  // the preference.
2627  if ((LOrder || ROrder) && LOrder != ROrder)
2628  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2629 
2630  return BURRSort(left, right, SPQ);
2631 }
2632 
2633 // If the time between now and when the instruction will be ready can cover
2634 // the spill code, then avoid adding it to the ready queue. This gives long
2635 // stalls highest priority and allows hoisting across calls. It should also
2636 // speed up processing the available queue.
2637 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2638  static const unsigned ReadyDelay = 3;
2639 
2640  if (SPQ->MayReduceRegPressure(SU)) return true;
2641 
2642  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2643 
2644  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2646  return false;
2647 
2648  return true;
2649 }
2650 
2651 // Return true if right should be scheduled with higher priority than left.
2652 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2653  if (int res = checkSpecialNodes(left, right))
2654  return res > 0;
2655 
2656  if (left->isCall || right->isCall)
2657  // No way to compute latency of calls.
2658  return BURRSort(left, right, SPQ);
2659 
2660  bool LHigh = SPQ->HighRegPressure(left);
2661  bool RHigh = SPQ->HighRegPressure(right);
2662  // Avoid causing spills. If register pressure is high, schedule for
2663  // register pressure reduction.
2664  if (LHigh && !RHigh) {
2665  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2666  << right->NodeNum << ")\n");
2667  return true;
2668  }
2669  else if (!LHigh && RHigh) {
2670  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2671  << left->NodeNum << ")\n");
2672  return false;
2673  }
2674  if (!LHigh && !RHigh) {
2675  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2676  if (result != 0)
2677  return result > 0;
2678  }
2679  return BURRSort(left, right, SPQ);
2680 }
2681 
2682 // Schedule as many instructions in each cycle as possible. So don't make an
2683 // instruction available unless it is ready in the current cycle.
2684 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2685  if (SU->getHeight() > CurCycle) return false;
2686 
2687  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2689  return false;
2690 
2691  return true;
2692 }
2693 
2694 static bool canEnableCoalescing(SUnit *SU) {
2695  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2696  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2697  // CopyToReg should be close to its uses to facilitate coalescing and
2698  // avoid spilling.
2699  return true;
2700 
2701  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2702  Opc == TargetOpcode::SUBREG_TO_REG ||
2703  Opc == TargetOpcode::INSERT_SUBREG)
2704  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2705  // close to their uses to facilitate coalescing.
2706  return true;
2707 
2708  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2709  // If SU does not have a register def, schedule it close to its uses
2710  // because it does not lengthen any live ranges.
2711  return true;
2712 
2713  return false;
2714 }
2715 
2716 // list-ilp is currently an experimental scheduler that allows various
2717 // heuristics to be enabled prior to the normal register reduction logic.
2718 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2719  if (int res = checkSpecialNodes(left, right))
2720  return res > 0;
2721 
2722  if (left->isCall || right->isCall)
2723  // No way to compute latency of calls.
2724  return BURRSort(left, right, SPQ);
2725 
2726  unsigned LLiveUses = 0, RLiveUses = 0;
2727  int LPDiff = 0, RPDiff = 0;
2729  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2730  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2731  }
2732  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2733  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2734  << "): " << LPDiff << " != SU(" << right->NodeNum
2735  << "): " << RPDiff << "\n");
2736  return LPDiff > RPDiff;
2737  }
2738 
2739  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2740  bool LReduce = canEnableCoalescing(left);
2741  bool RReduce = canEnableCoalescing(right);
2742  if (LReduce && !RReduce) return false;
2743  if (RReduce && !LReduce) return true;
2744  }
2745 
2746  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2747  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2748  << " != SU(" << right->NodeNum << "): " << RLiveUses
2749  << "\n");
2750  return LLiveUses < RLiveUses;
2751  }
2752 
2753  if (!DisableSchedStalls) {
2754  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2755  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2756  if (LStall != RStall)
2757  return left->getHeight() > right->getHeight();
2758  }
2759 
2760  if (!DisableSchedCriticalPath) {
2761  int spread = (int)left->getDepth() - (int)right->getDepth();
2762  if (std::abs(spread) > MaxReorderWindow) {
2763  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2764  << left->getDepth() << " != SU(" << right->NodeNum
2765  << "): " << right->getDepth() << "\n");
2766  return left->getDepth() < right->getDepth();
2767  }
2768  }
2769 
2770  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2771  int spread = (int)left->getHeight() - (int)right->getHeight();
2772  if (std::abs(spread) > MaxReorderWindow)
2773  return left->getHeight() > right->getHeight();
2774  }
2775 
2776  return BURRSort(left, right, SPQ);
2777 }
2778 
2779 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2780  SUnits = &sunits;
2781  // Add pseudo dependency edges for two-address nodes.
2782  if (!Disable2AddrHack)
2783  AddPseudoTwoAddrDeps();
2784  // Reroute edges to nodes with multiple uses.
2785  if (!TracksRegPressure && !SrcOrder)
2786  PrescheduleNodesWithMultipleUses();
2787  // Calculate node priorities.
2788  CalculateSethiUllmanNumbers();
2789 
2790  // For single block loops, mark nodes that look like canonical IV increments.
2791  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2792  for (SUnit &SU : sunits)
2793  initVRegCycle(&SU);
2794 }
2795 
2796 //===----------------------------------------------------------------------===//
2797 // Preschedule for Register Pressure
2798 //===----------------------------------------------------------------------===//
2799 
2800 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2801  if (SU->isTwoAddress) {
2802  unsigned Opc = SU->getNode()->getMachineOpcode();
2803  const MCInstrDesc &MCID = TII->get(Opc);
2804  unsigned NumRes = MCID.getNumDefs();
2805  unsigned NumOps = MCID.getNumOperands() - NumRes;
2806  for (unsigned i = 0; i != NumOps; ++i) {
2807  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2808  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2809  if (DU->getNodeId() != -1 &&
2810  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2811  return true;
2812  }
2813  }
2814  }
2815  return false;
2816 }
2817 
2818 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2819 /// successor's explicit physregs whose definition can reach DepSU.
2820 /// i.e. DepSU should not be scheduled above SU.
2821 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2822  ScheduleDAGRRList *scheduleDAG,
2823  const TargetInstrInfo *TII,
2824  const TargetRegisterInfo *TRI) {
2825  const MCPhysReg *ImpDefs
2826  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2827  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2828  if(!ImpDefs && !RegMask)
2829  return false;
2830 
2831  for (const SDep &Succ : SU->Succs) {
2832  SUnit *SuccSU = Succ.getSUnit();
2833  for (const SDep &SuccPred : SuccSU->Preds) {
2834  if (!SuccPred.isAssignedRegDep())
2835  continue;
2836 
2837  if (RegMask &&
2838  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2839  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2840  return true;
2841 
2842  if (ImpDefs)
2843  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2844  // Return true if SU clobbers this physical register use and the
2845  // definition of the register reaches from DepSU. IsReachable queries
2846  // a topological forward sort of the DAG (following the successors).
2847  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2848  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2849  return true;
2850  }
2851  }
2852  return false;
2853 }
2854 
2855 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2856 /// physical register defs.
2857 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2858  const TargetInstrInfo *TII,
2859  const TargetRegisterInfo *TRI) {
2860  SDNode *N = SuccSU->getNode();
2861  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2862  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2863  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2864  for (const SDNode *SUNode = SU->getNode(); SUNode;
2865  SUNode = SUNode->getGluedNode()) {
2866  if (!SUNode->isMachineOpcode())
2867  continue;
2868  const MCPhysReg *SUImpDefs =
2869  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2870  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2871  if (!SUImpDefs && !SURegMask)
2872  continue;
2873  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2874  MVT VT = N->getSimpleValueType(i);
2875  if (VT == MVT::Glue || VT == MVT::Other)
2876  continue;
2877  if (!N->hasAnyUseOfValue(i))
2878  continue;
2879  unsigned Reg = ImpDefs[i - NumDefs];
2880  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2881  return true;
2882  if (!SUImpDefs)
2883  continue;
2884  for (;*SUImpDefs; ++SUImpDefs) {
2885  unsigned SUReg = *SUImpDefs;
2886  if (TRI->regsOverlap(Reg, SUReg))
2887  return true;
2888  }
2889  }
2890  }
2891  return false;
2892 }
2893 
2894 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2895 /// are not handled well by the general register pressure reduction
2896 /// heuristics. When presented with code like this:
2897 ///
2898 /// N
2899 /// / |
2900 /// / |
2901 /// U store
2902 /// |
2903 /// ...
2904 ///
2905 /// the heuristics tend to push the store up, but since the
2906 /// operand of the store has another use (U), this would increase
2907 /// the length of that other use (the U->N edge).
2908 ///
2909 /// This function transforms code like the above to route U's
2910 /// dependence through the store when possible, like this:
2911 ///
2912 /// N
2913 /// ||
2914 /// ||
2915 /// store
2916 /// |
2917 /// U
2918 /// |
2919 /// ...
2920 ///
2921 /// This results in the store being scheduled immediately
2922 /// after N, which shortens the U->N live range, reducing
2923 /// register pressure.
2924 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2925  // Visit all the nodes in topological order, working top-down.
2926  for (SUnit &SU : *SUnits) {
2927  // For now, only look at nodes with no data successors, such as stores.
2928  // These are especially important, due to the heuristics in
2929  // getNodePriority for nodes with no data successors.
2930  if (SU.NumSuccs != 0)
2931  continue;
2932  // For now, only look at nodes with exactly one data predecessor.
2933  if (SU.NumPreds != 1)
2934  continue;
2935  // Avoid prescheduling copies to virtual registers, which don't behave
2936  // like other nodes from the perspective of scheduling heuristics.
2937  if (SDNode *N = SU.getNode())
2938  if (N->getOpcode() == ISD::CopyToReg &&
2940  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2941  continue;
2942 
2943  SDNode *PredFrameSetup = nullptr;
2944  for (const SDep &Pred : SU.Preds)
2945  if (Pred.isCtrl() && Pred.getSUnit()) {
2946  // Find the predecessor which is not data dependence.
2947  SDNode *PredND = Pred.getSUnit()->getNode();
2948 
2949  // If PredND is FrameSetup, we should not pre-scheduled the node,
2950  // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2951  // ADJCALLSTACKUP may hold CallResource too long and make other
2952  // calls can't be scheduled. If there's no other available node
2953  // to schedule, the schedular will try to rename the register by
2954  // creating copy to avoid the conflict which will fail because
2955  // CallResource is not a real physical register.
2956  if (PredND && PredND->isMachineOpcode() &&
2957  (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2958  PredFrameSetup = PredND;
2959  break;
2960  }
2961  }
2962  // Skip the node has FrameSetup parent.
2963  if (PredFrameSetup != nullptr)
2964  continue;
2965 
2966  // Locate the single data predecessor.
2967  SUnit *PredSU = nullptr;
2968  for (const SDep &Pred : SU.Preds)
2969  if (!Pred.isCtrl()) {
2970  PredSU = Pred.getSUnit();
2971  break;
2972  }
2973  assert(PredSU);
2974 
2975  // Don't rewrite edges that carry physregs, because that requires additional
2976  // support infrastructure.
2977  if (PredSU->hasPhysRegDefs)
2978  continue;
2979  // Short-circuit the case where SU is PredSU's only data successor.
2980  if (PredSU->NumSuccs == 1)
2981  continue;
2982  // Avoid prescheduling to copies from virtual registers, which don't behave
2983  // like other nodes from the perspective of scheduling heuristics.
2984  if (SDNode *N = SU.getNode())
2985  if (N->getOpcode() == ISD::CopyFromReg &&
2987  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2988  continue;
2989 
2990  // Perform checks on the successors of PredSU.
2991  for (const SDep &PredSucc : PredSU->Succs) {
2992  SUnit *PredSuccSU = PredSucc.getSUnit();
2993  if (PredSuccSU == &SU) continue;
2994  // If PredSU has another successor with no data successors, for
2995  // now don't attempt to choose either over the other.
2996  if (PredSuccSU->NumSuccs == 0)
2997  goto outer_loop_continue;
2998  // Don't break physical register dependencies.
2999  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3000  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3001  goto outer_loop_continue;
3002  // Don't introduce graph cycles.
3003  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3004  goto outer_loop_continue;
3005  }
3006 
3007  // Ok, the transformation is safe and the heuristics suggest it is
3008  // profitable. Update the graph.
3009  LLVM_DEBUG(
3010  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3011  << PredSU->NodeNum
3012  << " to guide scheduling in the presence of multiple uses\n");
3013  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3014  SDep Edge = PredSU->Succs[i];
3015  assert(!Edge.isAssignedRegDep());
3016  SUnit *SuccSU = Edge.getSUnit();
3017  if (SuccSU != &SU) {
3018  Edge.setSUnit(PredSU);
3019  scheduleDAG->RemovePred(SuccSU, Edge);
3020  scheduleDAG->AddPred(&SU, Edge);
3021  Edge.setSUnit(&SU);
3022  scheduleDAG->AddPred(SuccSU, Edge);
3023  --i;
3024  }
3025  }
3026  outer_loop_continue:;
3027  }
3028 }
3029 
3030 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3031 /// it as a def&use operand. Add a pseudo control edge from it to the other
3032 /// node (if it won't create a cycle) so the two-address one will be scheduled
3033 /// first (lower in the schedule). If both nodes are two-address, favor the
3034 /// one that has a CopyToReg use (more likely to be a loop induction update).
3035 /// If both are two-address, but one is commutable while the other is not
3036 /// commutable, favor the one that's not commutable.
3037 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3038  for (SUnit &SU : *SUnits) {
3039  if (!SU.isTwoAddress)
3040  continue;
3041 
3042  SDNode *Node = SU.getNode();
3043  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3044  continue;
3045 
3046  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3047  unsigned Opc = Node->getMachineOpcode();
3048  const MCInstrDesc &MCID = TII->get(Opc);
3049  unsigned NumRes = MCID.getNumDefs();
3050  unsigned NumOps = MCID.getNumOperands() - NumRes;
3051  for (unsigned j = 0; j != NumOps; ++j) {
3052  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3053  continue;
3054  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3055  if (DU->getNodeId() == -1)
3056  continue;
3057  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3058  if (!DUSU)
3059  continue;
3060  for (const SDep &Succ : DUSU->Succs) {
3061  if (Succ.isCtrl())
3062  continue;
3063  SUnit *SuccSU = Succ.getSUnit();
3064  if (SuccSU == &SU)
3065  continue;
3066  // Be conservative. Ignore if nodes aren't at roughly the same
3067  // depth and height.
3068  if (SuccSU->getHeight() < SU.getHeight() &&
3069  (SU.getHeight() - SuccSU->getHeight()) > 1)
3070  continue;
3071  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3072  // constrains whatever is using the copy, instead of the copy
3073  // itself. In the case that the copy is coalesced, this
3074  // preserves the intent of the pseudo two-address heurietics.
3075  while (SuccSU->Succs.size() == 1 &&
3076  SuccSU->getNode()->isMachineOpcode() &&
3077  SuccSU->getNode()->getMachineOpcode() ==
3078  TargetOpcode::COPY_TO_REGCLASS)
3079  SuccSU = SuccSU->Succs.front().getSUnit();
3080  // Don't constrain non-instruction nodes.
3081  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3082  continue;
3083  // Don't constrain nodes with physical register defs if the
3084  // predecessor can clobber them.
3085  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3086  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3087  continue;
3088  }
3089  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3090  // these may be coalesced away. We want them close to their uses.
3091  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3092  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3093  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3094  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3095  continue;
3096  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3097  (!canClobber(SuccSU, DUSU) ||
3098  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3099  (!SU.isCommutable && SuccSU->isCommutable)) &&
3100  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3101  LLVM_DEBUG(dbgs()
3102  << " Adding a pseudo-two-addr edge from SU #"
3103  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3104  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3105  }
3106  }
3107  }
3108  }
3109 }
3110 
3111 //===----------------------------------------------------------------------===//
3112 // Public Constructor Functions
3113 //===----------------------------------------------------------------------===//
3114 
3117  CodeGenOpt::Level OptLevel) {
3118  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3119  const TargetInstrInfo *TII = STI.getInstrInfo();
3120  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3121 
3122  BURegReductionPriorityQueue *PQ =
3123  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3124  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3125  PQ->setScheduleDAG(SD);
3126  return SD;
3127 }
3128 
3131  CodeGenOpt::Level OptLevel) {
3132  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3133  const TargetInstrInfo *TII = STI.getInstrInfo();
3134  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3135 
3136  SrcRegReductionPriorityQueue *PQ =
3137  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3138  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3139  PQ->setScheduleDAG(SD);
3140  return SD;
3141 }
3142 
3145  CodeGenOpt::Level OptLevel) {
3146  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3147  const TargetInstrInfo *TII = STI.getInstrInfo();
3148  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3149  const TargetLowering *TLI = IS->TLI;
3150 
3151  HybridBURRPriorityQueue *PQ =
3152  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3153 
3154  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3155  PQ->setScheduleDAG(SD);
3156  return SD;
3157 }
3158 
3161  CodeGenOpt::Level OptLevel) {
3162  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3163  const TargetInstrInfo *TII = STI.getInstrInfo();
3164  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3165  const TargetLowering *TLI = IS->TLI;
3166 
3167  ILPBURRPriorityQueue *PQ =
3168  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3169  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3170  PQ->setScheduleDAG(SD);
3171  return SD;
3172 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:464
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:436
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual void dumpNode(const SUnit &SU) const =0
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:280
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:537
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:44
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:496
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:267
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:544
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:546
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:277
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:668
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:250
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:274
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:850
Scheduling dependency.
Definition: ScheduleDAG.h:49
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:47
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:545
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:95
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:238
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:335
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1206
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:676
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:187
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
SI Lower i1 Copies
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:542
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:40
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:272
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:671
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:689
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1244
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.