LLVM  15.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Config/llvm-config.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Does *NOT* update the topological ordering! It just queues an update.
225  void AddPredQueued(SUnit *SU, const SDep &D) {
226  Topo.AddPredQueued(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// AddPred - adds a predecessor edge to SUnit SU.
231  /// This returns true if this is a new predecessor.
232  /// Updates the topological ordering if required.
233  void AddPred(SUnit *SU, const SDep &D) {
234  Topo.AddPred(SU, D.getSUnit());
235  SU->addPred(D);
236  }
237 
238  /// RemovePred - removes a predecessor edge from SUnit SU.
239  /// This returns true if an edge was removed.
240  /// Updates the topological ordering if required.
241  void RemovePred(SUnit *SU, const SDep &D) {
242  Topo.RemovePred(SU, D.getSUnit());
243  SU->removePred(D);
244  }
245 
246 private:
247  bool isReady(SUnit *SU) {
248  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
249  AvailableQueue->isReady(SU);
250  }
251 
252  void ReleasePred(SUnit *SU, const SDep *PredEdge);
253  void ReleasePredecessors(SUnit *SU);
254  void ReleasePending();
255  void AdvanceToCycle(unsigned NextCycle);
256  void AdvancePastStalls(SUnit *SU);
257  void EmitNode(SUnit *SU);
258  void ScheduleNodeBottomUp(SUnit*);
259  void CapturePred(SDep *PredEdge);
260  void UnscheduleNodeBottomUp(SUnit*);
261  void RestoreHazardCheckerBottomUp();
262  void BacktrackBottomUp(SUnit*, SUnit*);
263  SUnit *TryUnfoldSU(SUnit *);
264  SUnit *CopyAndMoveSuccessors(SUnit*);
265  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
266  const TargetRegisterClass*,
267  const TargetRegisterClass*,
269  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
270 
271  void releaseInterferences(unsigned Reg = 0);
272 
273  SUnit *PickNodeToScheduleBottomUp();
274  void ListScheduleBottomUp();
275 
276  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
277  SUnit *CreateNewSUnit(SDNode *N) {
278  unsigned NumSUnits = SUnits.size();
279  SUnit *NewNode = newSUnit(N);
280  // Update the topological ordering.
281  if (NewNode->NodeNum >= NumSUnits)
282  Topo.AddSUnitWithoutPredecessors(NewNode);
283  return NewNode;
284  }
285 
286  /// CreateClone - Creates a new SUnit from an existing one.
287  SUnit *CreateClone(SUnit *N) {
288  unsigned NumSUnits = SUnits.size();
289  SUnit *NewNode = Clone(N);
290  // Update the topological ordering.
291  if (NewNode->NodeNum >= NumSUnits)
292  Topo.AddSUnitWithoutPredecessors(NewNode);
293  return NewNode;
294  }
295 
296  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
297  /// need actual latency information but the hybrid scheduler does.
298  bool forceUnitLatencies() const override {
299  return !NeedLatency;
300  }
301 };
302 
303 } // end anonymous namespace
304 
305 /// GetCostForDef - Looks up the register class and cost for a given definition.
306 /// Typically this just means looking up the representative register class,
307 /// but for untyped values (MVT::Untyped) it means inspecting the node's
308 /// opcode to determine what register class is being generated.
309 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
310  const TargetLowering *TLI,
311  const TargetInstrInfo *TII,
312  const TargetRegisterInfo *TRI,
313  unsigned &RegClass, unsigned &Cost,
314  const MachineFunction &MF) {
315  MVT VT = RegDefPos.GetValue();
316 
317  // Special handling for untyped values. These values can only come from
318  // the expansion of custom DAG-to-DAG patterns.
319  if (VT == MVT::Untyped) {
320  const SDNode *Node = RegDefPos.GetNode();
321 
322  // Special handling for CopyFromReg of untyped values.
323  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
324  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
325  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
326  RegClass = RC->getID();
327  Cost = 1;
328  return;
329  }
330 
331  unsigned Opcode = Node->getMachineOpcode();
332  if (Opcode == TargetOpcode::REG_SEQUENCE) {
333  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
334  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
335  RegClass = RC->getID();
336  Cost = 1;
337  return;
338  }
339 
340  unsigned Idx = RegDefPos.GetIdx();
341  const MCInstrDesc Desc = TII->get(Opcode);
342  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
343  RegClass = RC->getID();
344  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
345  // better way to determine it.
346  Cost = 1;
347  } else {
348  RegClass = TLI->getRepRegClassFor(VT)->getID();
349  Cost = TLI->getRepRegClassCostFor(VT);
350  }
351 }
352 
353 /// Schedule - Schedule the DAG using list scheduling.
354 void ScheduleDAGRRList::Schedule() {
355  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
356  << " '" << BB->getName() << "' **********\n");
357 
358  CurCycle = 0;
359  IssueCount = 0;
360  MinAvailableCycle =
362  NumLiveRegs = 0;
363  // Allocate slots for each physical register, plus one for a special register
364  // to track the virtual resource of a calling sequence.
365  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
366  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
367  CallSeqEndForStart.clear();
368  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
369 
370  // Build the scheduling graph.
371  BuildSchedGraph(nullptr);
372 
373  LLVM_DEBUG(dump());
374  Topo.MarkDirty();
375 
376  AvailableQueue->initNodes(SUnits);
377 
378  HazardRec->Reset();
379 
380  // Execute the actual scheduling loop.
381  ListScheduleBottomUp();
382 
383  AvailableQueue->releaseState();
384 
385  LLVM_DEBUG({
386  dbgs() << "*** Final schedule ***\n";
387  dumpSchedule();
388  dbgs() << '\n';
389  });
390 }
391 
392 //===----------------------------------------------------------------------===//
393 // Bottom-Up Scheduling
394 //===----------------------------------------------------------------------===//
395 
396 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
397 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
398 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
399  SUnit *PredSU = PredEdge->getSUnit();
400 
401 #ifndef NDEBUG
402  if (PredSU->NumSuccsLeft == 0) {
403  dbgs() << "*** Scheduling failed! ***\n";
404  dumpNode(*PredSU);
405  dbgs() << " has been released too many times!\n";
406  llvm_unreachable(nullptr);
407  }
408 #endif
409  --PredSU->NumSuccsLeft;
410 
411  if (!forceUnitLatencies()) {
412  // Updating predecessor's height. This is now the cycle when the
413  // predecessor can be scheduled without causing a pipeline stall.
414  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
415  }
416 
417  // If all the node's successors are scheduled, this node is ready
418  // to be scheduled. Ignore the special EntrySU node.
419  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
420  PredSU->isAvailable = true;
421 
422  unsigned Height = PredSU->getHeight();
423  if (Height < MinAvailableCycle)
424  MinAvailableCycle = Height;
425 
426  if (isReady(PredSU)) {
427  AvailableQueue->push(PredSU);
428  }
429  // CapturePred and others may have left the node in the pending queue, avoid
430  // adding it twice.
431  else if (!PredSU->isPending) {
432  PredSU->isPending = true;
433  PendingQueue.push_back(PredSU);
434  }
435  }
436 }
437 
438 /// IsChainDependent - Test if Outer is reachable from Inner through
439 /// chain dependencies.
440 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
441  unsigned NestLevel,
442  const TargetInstrInfo *TII) {
443  SDNode *N = Outer;
444  while (true) {
445  if (N == Inner)
446  return true;
447  // For a TokenFactor, examine each operand. There may be multiple ways
448  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
449  // most nesting in order to ensure that we find the corresponding match.
450  if (N->getOpcode() == ISD::TokenFactor) {
451  for (const SDValue &Op : N->op_values())
452  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
453  return true;
454  return false;
455  }
456  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
457  if (N->isMachineOpcode()) {
458  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
459  ++NestLevel;
460  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
461  if (NestLevel == 0)
462  return false;
463  --NestLevel;
464  }
465  }
466  // Otherwise, find the chain and continue climbing.
467  for (const SDValue &Op : N->op_values())
468  if (Op.getValueType() == MVT::Other) {
469  N = Op.getNode();
470  goto found_chain_operand;
471  }
472  return false;
473  found_chain_operand:;
474  if (N->getOpcode() == ISD::EntryToken)
475  return false;
476  }
477 }
478 
479 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
480 /// the corresponding (lowered) CALLSEQ_BEGIN node.
481 ///
482 /// NestLevel and MaxNested are used in recursion to indcate the current level
483 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
484 /// level seen so far.
485 ///
486 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
487 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
488 static SDNode *
489 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
490  const TargetInstrInfo *TII) {
491  while (true) {
492  // For a TokenFactor, examine each operand. There may be multiple ways
493  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
494  // most nesting in order to ensure that we find the corresponding match.
495  if (N->getOpcode() == ISD::TokenFactor) {
496  SDNode *Best = nullptr;
497  unsigned BestMaxNest = MaxNest;
498  for (const SDValue &Op : N->op_values()) {
499  unsigned MyNestLevel = NestLevel;
500  unsigned MyMaxNest = MaxNest;
501  if (SDNode *New = FindCallSeqStart(Op.getNode(),
502  MyNestLevel, MyMaxNest, TII))
503  if (!Best || (MyMaxNest > BestMaxNest)) {
504  Best = New;
505  BestMaxNest = MyMaxNest;
506  }
507  }
508  assert(Best);
509  MaxNest = BestMaxNest;
510  return Best;
511  }
512  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
513  if (N->isMachineOpcode()) {
514  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
515  ++NestLevel;
516  MaxNest = std::max(MaxNest, NestLevel);
517  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
518  assert(NestLevel != 0);
519  --NestLevel;
520  if (NestLevel == 0)
521  return N;
522  }
523  }
524  // Otherwise, find the chain and continue climbing.
525  for (const SDValue &Op : N->op_values())
526  if (Op.getValueType() == MVT::Other) {
527  N = Op.getNode();
528  goto found_chain_operand;
529  }
530  return nullptr;
531  found_chain_operand:;
532  if (N->getOpcode() == ISD::EntryToken)
533  return nullptr;
534  }
535 }
536 
537 /// Call ReleasePred for each predecessor, then update register live def/gen.
538 /// Always update LiveRegDefs for a register dependence even if the current SU
539 /// also defines the register. This effectively create one large live range
540 /// across a sequence of two-address node. This is important because the
541 /// entire chain must be scheduled together. Example:
542 ///
543 /// flags = (3) add
544 /// flags = (2) addc flags
545 /// flags = (1) addc flags
546 ///
547 /// results in
548 ///
549 /// LiveRegDefs[flags] = 3
550 /// LiveRegGens[flags] = 1
551 ///
552 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
553 /// interference on flags.
554 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
555  // Bottom up: release predecessors
556  for (SDep &Pred : SU->Preds) {
557  ReleasePred(SU, &Pred);
558  if (Pred.isAssignedRegDep()) {
559  // This is a physical register dependency and it's impossible or
560  // expensive to copy the register. Make sure nothing that can
561  // clobber the register is scheduled between the predecessor and
562  // this node.
563  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
564  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
565  "interference on register dependence");
566  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
567  if (!LiveRegGens[Pred.getReg()]) {
568  ++NumLiveRegs;
569  LiveRegGens[Pred.getReg()] = SU;
570  }
571  }
572  }
573 
574  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
575  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
576  // these nodes, to prevent other calls from being interscheduled with them.
577  unsigned CallResource = TRI->getNumRegs();
578  if (!LiveRegDefs[CallResource])
579  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
580  if (Node->isMachineOpcode() &&
581  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
582  unsigned NestLevel = 0;
583  unsigned MaxNest = 0;
584  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
585  assert(N && "Must find call sequence start");
586 
587  SUnit *Def = &SUnits[N->getNodeId()];
588  CallSeqEndForStart[Def] = SU;
589 
590  ++NumLiveRegs;
591  LiveRegDefs[CallResource] = Def;
592  LiveRegGens[CallResource] = SU;
593  break;
594  }
595 }
596 
597 /// Check to see if any of the pending instructions are ready to issue. If
598 /// so, add them to the available queue.
599 void ScheduleDAGRRList::ReleasePending() {
600  if (DisableSchedCycles) {
601  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
602  return;
603  }
604 
605  // If the available queue is empty, it is safe to reset MinAvailableCycle.
606  if (AvailableQueue->empty())
607  MinAvailableCycle = std::numeric_limits<unsigned>::max();
608 
609  // Check to see if any of the pending instructions are ready to issue. If
610  // so, add them to the available queue.
611  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
612  unsigned ReadyCycle = PendingQueue[i]->getHeight();
613  if (ReadyCycle < MinAvailableCycle)
614  MinAvailableCycle = ReadyCycle;
615 
616  if (PendingQueue[i]->isAvailable) {
617  if (!isReady(PendingQueue[i]))
618  continue;
619  AvailableQueue->push(PendingQueue[i]);
620  }
621  PendingQueue[i]->isPending = false;
622  PendingQueue[i] = PendingQueue.back();
623  PendingQueue.pop_back();
624  --i; --e;
625  }
626 }
627 
628 /// Move the scheduler state forward by the specified number of Cycles.
629 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
630  if (NextCycle <= CurCycle)
631  return;
632 
633  IssueCount = 0;
634  AvailableQueue->setCurCycle(NextCycle);
635  if (!HazardRec->isEnabled()) {
636  // Bypass lots of virtual calls in case of long latency.
637  CurCycle = NextCycle;
638  }
639  else {
640  for (; CurCycle != NextCycle; ++CurCycle) {
641  HazardRec->RecedeCycle();
642  }
643  }
644  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
645  // available Q to release pending nodes at least once before popping.
646  ReleasePending();
647 }
648 
649 /// Move the scheduler state forward until the specified node's dependents are
650 /// ready and can be scheduled with no resource conflicts.
651 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
652  if (DisableSchedCycles)
653  return;
654 
655  // FIXME: Nodes such as CopyFromReg probably should not advance the current
656  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
657  // has predecessors the cycle will be advanced when they are scheduled.
658  // But given the crude nature of modeling latency though such nodes, we
659  // currently need to treat these nodes like real instructions.
660  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
661 
662  unsigned ReadyCycle = SU->getHeight();
663 
664  // Bump CurCycle to account for latency. We assume the latency of other
665  // available instructions may be hidden by the stall (not a full pipe stall).
666  // This updates the hazard recognizer's cycle before reserving resources for
667  // this instruction.
668  AdvanceToCycle(ReadyCycle);
669 
670  // Calls are scheduled in their preceding cycle, so don't conflict with
671  // hazards from instructions after the call. EmitNode will reset the
672  // scoreboard state before emitting the call.
673  if (SU->isCall)
674  return;
675 
676  // FIXME: For resource conflicts in very long non-pipelined stages, we
677  // should probably skip ahead here to avoid useless scoreboard checks.
678  int Stalls = 0;
679  while (true) {
681  HazardRec->getHazardType(SU, -Stalls);
682 
684  break;
685 
686  ++Stalls;
687  }
688  AdvanceToCycle(CurCycle + Stalls);
689 }
690 
691 /// Record this SUnit in the HazardRecognizer.
692 /// Does not update CurCycle.
693 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
694  if (!HazardRec->isEnabled())
695  return;
696 
697  // Check for phys reg copy.
698  if (!SU->getNode())
699  return;
700 
701  switch (SU->getNode()->getOpcode()) {
702  default:
703  assert(SU->getNode()->isMachineOpcode() &&
704  "This target-independent node should not be scheduled.");
705  break;
706  case ISD::MERGE_VALUES:
707  case ISD::TokenFactor:
708  case ISD::LIFETIME_START:
709  case ISD::LIFETIME_END:
710  case ISD::CopyToReg:
711  case ISD::CopyFromReg:
712  case ISD::EH_LABEL:
713  // Noops don't affect the scoreboard state. Copies are likely to be
714  // removed.
715  return;
716  case ISD::INLINEASM:
717  case ISD::INLINEASM_BR:
718  // For inline asm, clear the pipeline state.
719  HazardRec->Reset();
720  return;
721  }
722  if (SU->isCall) {
723  // Calls are scheduled with their preceding instructions. For bottom-up
724  // scheduling, clear the pipeline state before emitting.
725  HazardRec->Reset();
726  }
727 
728  HazardRec->EmitInstruction(SU);
729 }
730 
731 static void resetVRegCycle(SUnit *SU);
732 
733 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
734 /// count of its predecessors. If a predecessor pending count is zero, add it to
735 /// the Available queue.
736 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
737  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
738  LLVM_DEBUG(dumpNode(*SU));
739 
740 #ifndef NDEBUG
741  if (CurCycle < SU->getHeight())
742  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
743  << "] pipeline stall!\n");
744 #endif
745 
746  // FIXME: Do not modify node height. It may interfere with
747  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
748  // node its ready cycle can aid heuristics, and after scheduling it can
749  // indicate the scheduled cycle.
750  SU->setHeightToAtLeast(CurCycle);
751 
752  // Reserve resources for the scheduled instruction.
753  EmitNode(SU);
754 
755  Sequence.push_back(SU);
756 
757  AvailableQueue->scheduledNode(SU);
758 
759  // If HazardRec is disabled, and each inst counts as one cycle, then
760  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
761  // PendingQueue for schedulers that implement HasReadyFilter.
762  if (!HazardRec->isEnabled() && AvgIPC < 2)
763  AdvanceToCycle(CurCycle + 1);
764 
765  // Update liveness of predecessors before successors to avoid treating a
766  // two-address node as a live range def.
767  ReleasePredecessors(SU);
768 
769  // Release all the implicit physical register defs that are live.
770  for (SDep &Succ : SU->Succs) {
771  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
772  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
773  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
774  --NumLiveRegs;
775  LiveRegDefs[Succ.getReg()] = nullptr;
776  LiveRegGens[Succ.getReg()] = nullptr;
777  releaseInterferences(Succ.getReg());
778  }
779  }
780  // Release the special call resource dependence, if this is the beginning
781  // of a call.
782  unsigned CallResource = TRI->getNumRegs();
783  if (LiveRegDefs[CallResource] == SU)
784  for (const SDNode *SUNode = SU->getNode(); SUNode;
785  SUNode = SUNode->getGluedNode()) {
786  if (SUNode->isMachineOpcode() &&
787  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
788  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
789  --NumLiveRegs;
790  LiveRegDefs[CallResource] = nullptr;
791  LiveRegGens[CallResource] = nullptr;
792  releaseInterferences(CallResource);
793  }
794  }
795 
796  resetVRegCycle(SU);
797 
798  SU->isScheduled = true;
799 
800  // Conditions under which the scheduler should eagerly advance the cycle:
801  // (1) No available instructions
802  // (2) All pipelines full, so available instructions must have hazards.
803  //
804  // If HazardRec is disabled, the cycle was pre-advanced before calling
805  // ReleasePredecessors. In that case, IssueCount should remain 0.
806  //
807  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
808  if (HazardRec->isEnabled() || AvgIPC > 1) {
809  if (SU->getNode() && SU->getNode()->isMachineOpcode())
810  ++IssueCount;
811  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
812  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
813  AdvanceToCycle(CurCycle + 1);
814  }
815 }
816 
817 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
818 /// unscheduled, increase the succ left count of its predecessors. Remove
819 /// them from AvailableQueue if necessary.
820 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
821  SUnit *PredSU = PredEdge->getSUnit();
822  if (PredSU->isAvailable) {
823  PredSU->isAvailable = false;
824  if (!PredSU->isPending)
825  AvailableQueue->remove(PredSU);
826  }
827 
829  "NumSuccsLeft will overflow!");
830  ++PredSU->NumSuccsLeft;
831 }
832 
833 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
834 /// its predecessor states to reflect the change.
835 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
836  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
837  LLVM_DEBUG(dumpNode(*SU));
838 
839  for (SDep &Pred : SU->Preds) {
840  CapturePred(&Pred);
841  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
842  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
844  "Physical register dependency violated?");
845  --NumLiveRegs;
846  LiveRegDefs[Pred.getReg()] = nullptr;
847  LiveRegGens[Pred.getReg()] = nullptr;
848  releaseInterferences(Pred.getReg());
849  }
850  }
851 
852  // Reclaim the special call resource dependence, if this is the beginning
853  // of a call.
854  unsigned CallResource = TRI->getNumRegs();
855  for (const SDNode *SUNode = SU->getNode(); SUNode;
856  SUNode = SUNode->getGluedNode()) {
857  if (SUNode->isMachineOpcode() &&
858  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
859  SUnit *SeqEnd = CallSeqEndForStart[SU];
860  assert(SeqEnd && "Call sequence start/end must be known");
861  assert(!LiveRegDefs[CallResource]);
862  assert(!LiveRegGens[CallResource]);
863  ++NumLiveRegs;
864  LiveRegDefs[CallResource] = SU;
865  LiveRegGens[CallResource] = SeqEnd;
866  }
867  }
868 
869  // Release the special call resource dependence, if this is the end
870  // of a call.
871  if (LiveRegGens[CallResource] == SU)
872  for (const SDNode *SUNode = SU->getNode(); SUNode;
873  SUNode = SUNode->getGluedNode()) {
874  if (SUNode->isMachineOpcode() &&
875  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
876  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
877  assert(LiveRegDefs[CallResource]);
878  assert(LiveRegGens[CallResource]);
879  --NumLiveRegs;
880  LiveRegDefs[CallResource] = nullptr;
881  LiveRegGens[CallResource] = nullptr;
882  releaseInterferences(CallResource);
883  }
884  }
885 
886  for (auto &Succ : SU->Succs) {
887  if (Succ.isAssignedRegDep()) {
888  auto Reg = Succ.getReg();
889  if (!LiveRegDefs[Reg])
890  ++NumLiveRegs;
891  // This becomes the nearest def. Note that an earlier def may still be
892  // pending if this is a two-address node.
893  LiveRegDefs[Reg] = SU;
894 
895  // Update LiveRegGen only if was empty before this unscheduling.
896  // This is to avoid incorrect updating LiveRegGen set in previous run.
897  if (!LiveRegGens[Reg]) {
898  // Find the successor with the lowest height.
899  LiveRegGens[Reg] = Succ.getSUnit();
900  for (auto &Succ2 : SU->Succs) {
901  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
902  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
903  LiveRegGens[Reg] = Succ2.getSUnit();
904  }
905  }
906  }
907  }
908  if (SU->getHeight() < MinAvailableCycle)
909  MinAvailableCycle = SU->getHeight();
910 
911  SU->setHeightDirty();
912  SU->isScheduled = false;
913  SU->isAvailable = true;
914  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
915  // Don't make available until backtracking is complete.
916  SU->isPending = true;
917  PendingQueue.push_back(SU);
918  }
919  else {
920  AvailableQueue->push(SU);
921  }
922  AvailableQueue->unscheduledNode(SU);
923 }
924 
925 /// After backtracking, the hazard checker needs to be restored to a state
926 /// corresponding the current cycle.
927 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
928  HazardRec->Reset();
929 
930  unsigned LookAhead = std::min((unsigned)Sequence.size(),
931  HazardRec->getMaxLookAhead());
932  if (LookAhead == 0)
933  return;
934 
935  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
936  unsigned HazardCycle = (*I)->getHeight();
937  for (auto E = Sequence.end(); I != E; ++I) {
938  SUnit *SU = *I;
939  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
940  HazardRec->RecedeCycle();
941  }
942  EmitNode(SU);
943  }
944 }
945 
946 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
947 /// BTCycle in order to schedule a specific node.
948 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
949  SUnit *OldSU = Sequence.back();
950  while (true) {
951  Sequence.pop_back();
952  // FIXME: use ready cycle instead of height
953  CurCycle = OldSU->getHeight();
954  UnscheduleNodeBottomUp(OldSU);
955  AvailableQueue->setCurCycle(CurCycle);
956  if (OldSU == BtSU)
957  break;
958  OldSU = Sequence.back();
959  }
960 
961  assert(!SU->isSucc(OldSU) && "Something is wrong!");
962 
963  RestoreHazardCheckerBottomUp();
964 
965  ReleasePending();
966 
967  ++NumBacktracks;
968 }
969 
970 static bool isOperandOf(const SUnit *SU, SDNode *N) {
971  for (const SDNode *SUNode = SU->getNode(); SUNode;
972  SUNode = SUNode->getGluedNode()) {
973  if (SUNode->isOperandOf(N))
974  return true;
975  }
976  return false;
977 }
978 
979 /// TryUnfold - Attempt to unfold
980 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
981  SDNode *N = SU->getNode();
982  // Use while over if to ease fall through.
983  SmallVector<SDNode *, 2> NewNodes;
984  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
985  return nullptr;
986 
987  // unfolding an x86 DEC64m operation results in store, dec, load which
988  // can't be handled here so quit
989  if (NewNodes.size() == 3)
990  return nullptr;
991 
992  assert(NewNodes.size() == 2 && "Expected a load folding node!");
993 
994  N = NewNodes[1];
995  SDNode *LoadNode = NewNodes[0];
996  unsigned NumVals = N->getNumValues();
997  unsigned OldNumVals = SU->getNode()->getNumValues();
998 
999  // LoadNode may already exist. This can happen when there is another
1000  // load from the same location and producing the same type of value
1001  // but it has different alignment or volatileness.
1002  bool isNewLoad = true;
1003  SUnit *LoadSU;
1004  if (LoadNode->getNodeId() != -1) {
1005  LoadSU = &SUnits[LoadNode->getNodeId()];
1006  // If LoadSU has already been scheduled, we should clone it but
1007  // this would negate the benefit to unfolding so just return SU.
1008  if (LoadSU->isScheduled)
1009  return SU;
1010  isNewLoad = false;
1011  } else {
1012  LoadSU = CreateNewSUnit(LoadNode);
1013  LoadNode->setNodeId(LoadSU->NodeNum);
1014 
1015  InitNumRegDefsLeft(LoadSU);
1016  computeLatency(LoadSU);
1017  }
1018 
1019  bool isNewN = true;
1020  SUnit *NewSU;
1021  // This can only happen when isNewLoad is false.
1022  if (N->getNodeId() != -1) {
1023  NewSU = &SUnits[N->getNodeId()];
1024  // If NewSU has already been scheduled, we need to clone it, but this
1025  // negates the benefit to unfolding so just return SU.
1026  if (NewSU->isScheduled) {
1027  return SU;
1028  }
1029  isNewN = false;
1030  } else {
1031  NewSU = CreateNewSUnit(N);
1032  N->setNodeId(NewSU->NodeNum);
1033 
1034  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1035  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1036  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1037  NewSU->isTwoAddress = true;
1038  break;
1039  }
1040  }
1041  if (MCID.isCommutable())
1042  NewSU->isCommutable = true;
1043 
1044  InitNumRegDefsLeft(NewSU);
1045  computeLatency(NewSU);
1046  }
1047 
1048  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1049 
1050  // Now that we are committed to unfolding replace DAG Uses.
1051  for (unsigned i = 0; i != NumVals; ++i)
1052  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1053  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1054  SDValue(LoadNode, 1));
1055 
1056  // Record all the edges to and from the old SU, by category.
1057  SmallVector<SDep, 4> ChainPreds;
1058  SmallVector<SDep, 4> ChainSuccs;
1059  SmallVector<SDep, 4> LoadPreds;
1060  SmallVector<SDep, 4> NodePreds;
1061  SmallVector<SDep, 4> NodeSuccs;
1062  for (SDep &Pred : SU->Preds) {
1063  if (Pred.isCtrl())
1064  ChainPreds.push_back(Pred);
1065  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1066  LoadPreds.push_back(Pred);
1067  else
1068  NodePreds.push_back(Pred);
1069  }
1070  for (SDep &Succ : SU->Succs) {
1071  if (Succ.isCtrl())
1072  ChainSuccs.push_back(Succ);
1073  else
1074  NodeSuccs.push_back(Succ);
1075  }
1076 
1077  // Now assign edges to the newly-created nodes.
1078  for (const SDep &Pred : ChainPreds) {
1079  RemovePred(SU, Pred);
1080  if (isNewLoad)
1081  AddPredQueued(LoadSU, Pred);
1082  }
1083  for (const SDep &Pred : LoadPreds) {
1084  RemovePred(SU, Pred);
1085  if (isNewLoad)
1086  AddPredQueued(LoadSU, Pred);
1087  }
1088  for (const SDep &Pred : NodePreds) {
1089  RemovePred(SU, Pred);
1090  AddPredQueued(NewSU, Pred);
1091  }
1092  for (SDep D : NodeSuccs) {
1093  SUnit *SuccDep = D.getSUnit();
1094  D.setSUnit(SU);
1095  RemovePred(SuccDep, D);
1096  D.setSUnit(NewSU);
1097  AddPredQueued(SuccDep, D);
1098  // Balance register pressure.
1099  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1100  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1101  --NewSU->NumRegDefsLeft;
1102  }
1103  for (SDep D : ChainSuccs) {
1104  SUnit *SuccDep = D.getSUnit();
1105  D.setSUnit(SU);
1106  RemovePred(SuccDep, D);
1107  if (isNewLoad) {
1108  D.setSUnit(LoadSU);
1109  AddPredQueued(SuccDep, D);
1110  }
1111  }
1112 
1113  // Add a data dependency to reflect that NewSU reads the value defined
1114  // by LoadSU.
1115  SDep D(LoadSU, SDep::Data, 0);
1116  D.setLatency(LoadSU->Latency);
1117  AddPredQueued(NewSU, D);
1118 
1119  if (isNewLoad)
1120  AvailableQueue->addNode(LoadSU);
1121  if (isNewN)
1122  AvailableQueue->addNode(NewSU);
1123 
1124  ++NumUnfolds;
1125 
1126  if (NewSU->NumSuccsLeft == 0)
1127  NewSU->isAvailable = true;
1128 
1129  return NewSU;
1130 }
1131 
1132 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1133 /// successors to the newly created node.
1134 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1135  SDNode *N = SU->getNode();
1136  if (!N)
1137  return nullptr;
1138 
1139  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1140  LLVM_DEBUG(dumpNode(*SU));
1141 
1142  if (N->getGluedNode() &&
1143  !TII->canCopyGluedNodeDuringSchedule(N)) {
1144  LLVM_DEBUG(
1145  dbgs()
1146  << "Giving up because it has incoming glue and the target does not "
1147  "want to copy it\n");
1148  return nullptr;
1149  }
1150 
1151  SUnit *NewSU;
1152  bool TryUnfold = false;
1153  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1154  MVT VT = N->getSimpleValueType(i);
1155  if (VT == MVT::Glue) {
1156  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1157  return nullptr;
1158  } else if (VT == MVT::Other)
1159  TryUnfold = true;
1160  }
1161  for (const SDValue &Op : N->op_values()) {
1162  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1163  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1164  LLVM_DEBUG(
1165  dbgs() << "Giving up because it one of the operands is glue and "
1166  "the target does not want to copy it\n");
1167  return nullptr;
1168  }
1169  }
1170 
1171  // If possible unfold instruction.
1172  if (TryUnfold) {
1173  SUnit *UnfoldSU = TryUnfoldSU(SU);
1174  if (!UnfoldSU)
1175  return nullptr;
1176  SU = UnfoldSU;
1177  N = SU->getNode();
1178  // If this can be scheduled don't bother duplicating and just return
1179  if (SU->NumSuccsLeft == 0)
1180  return SU;
1181  }
1182 
1183  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1184  NewSU = CreateClone(SU);
1185 
1186  // New SUnit has the exact same predecessors.
1187  for (SDep &Pred : SU->Preds)
1188  if (!Pred.isArtificial())
1189  AddPredQueued(NewSU, Pred);
1190 
1191  // Make sure the clone comes after the original. (InstrEmitter assumes
1192  // this ordering.)
1193  AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1194 
1195  // Only copy scheduled successors. Cut them from old node's successor
1196  // list and move them over.
1198  for (SDep &Succ : SU->Succs) {
1199  if (Succ.isArtificial())
1200  continue;
1201  SUnit *SuccSU = Succ.getSUnit();
1202  if (SuccSU->isScheduled) {
1203  SDep D = Succ;
1204  D.setSUnit(NewSU);
1205  AddPredQueued(SuccSU, D);
1206  D.setSUnit(SU);
1207  DelDeps.push_back(std::make_pair(SuccSU, D));
1208  }
1209  }
1210  for (auto &DelDep : DelDeps)
1211  RemovePred(DelDep.first, DelDep.second);
1212 
1213  AvailableQueue->updateNode(SU);
1214  AvailableQueue->addNode(NewSU);
1215 
1216  ++NumDups;
1217  return NewSU;
1218 }
1219 
1220 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1221 /// scheduled successors of the given SUnit to the last copy.
1222 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1223  const TargetRegisterClass *DestRC,
1224  const TargetRegisterClass *SrcRC,
1226  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1227  CopyFromSU->CopySrcRC = SrcRC;
1228  CopyFromSU->CopyDstRC = DestRC;
1229 
1230  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1231  CopyToSU->CopySrcRC = DestRC;
1232  CopyToSU->CopyDstRC = SrcRC;
1233 
1234  // Only copy scheduled successors. Cut them from old node's successor
1235  // list and move them over.
1237  for (SDep &Succ : SU->Succs) {
1238  if (Succ.isArtificial())
1239  continue;
1240  SUnit *SuccSU = Succ.getSUnit();
1241  if (SuccSU->isScheduled) {
1242  SDep D = Succ;
1243  D.setSUnit(CopyToSU);
1244  AddPredQueued(SuccSU, D);
1245  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1246  }
1247  else {
1248  // Avoid scheduling the def-side copy before other successors. Otherwise
1249  // we could introduce another physreg interference on the copy and
1250  // continue inserting copies indefinitely.
1251  AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1252  }
1253  }
1254  for (auto &DelDep : DelDeps)
1255  RemovePred(DelDep.first, DelDep.second);
1256 
1257  SDep FromDep(SU, SDep::Data, Reg);
1258  FromDep.setLatency(SU->Latency);
1259  AddPredQueued(CopyFromSU, FromDep);
1260  SDep ToDep(CopyFromSU, SDep::Data, 0);
1261  ToDep.setLatency(CopyFromSU->Latency);
1262  AddPredQueued(CopyToSU, ToDep);
1263 
1264  AvailableQueue->updateNode(SU);
1265  AvailableQueue->addNode(CopyFromSU);
1266  AvailableQueue->addNode(CopyToSU);
1267  Copies.push_back(CopyFromSU);
1268  Copies.push_back(CopyToSU);
1269 
1270  ++NumPRCopies;
1271 }
1272 
1273 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1274 /// definition of the specified node.
1275 /// FIXME: Move to SelectionDAG?
1277  const TargetInstrInfo *TII) {
1278  unsigned NumRes;
1279  if (N->getOpcode() == ISD::CopyFromReg) {
1280  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1281  NumRes = 1;
1282  } else {
1283  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1284  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1285  NumRes = MCID.getNumDefs();
1286  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1287  if (Reg == *ImpDef)
1288  break;
1289  ++NumRes;
1290  }
1291  }
1292  return N->getSimpleValueType(NumRes);
1293 }
1294 
1295 /// CheckForLiveRegDef - Return true and update live register vector if the
1296 /// specified register def of the specified SUnit clobbers any "live" registers.
1297 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1298  SmallSet<unsigned, 4> &RegAdded,
1300  const TargetRegisterInfo *TRI,
1301  const SDNode *Node = nullptr) {
1302  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1303 
1304  // Check if Ref is live.
1305  if (!LiveRegDefs[*AliasI]) continue;
1306 
1307  // Allow multiple uses of the same def.
1308  if (LiveRegDefs[*AliasI] == SU) continue;
1309 
1310  // Allow multiple uses of same def
1311  if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1312  continue;
1313 
1314  // Add Reg to the set of interfering live regs.
1315  if (RegAdded.insert(*AliasI).second) {
1316  LRegs.push_back(*AliasI);
1317  }
1318  }
1319 }
1320 
1321 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1322 /// by RegMask, and add them to LRegs.
1323 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1324  ArrayRef<SUnit*> LiveRegDefs,
1325  SmallSet<unsigned, 4> &RegAdded,
1326  SmallVectorImpl<unsigned> &LRegs) {
1327  // Look at all live registers. Skip Reg0 and the special CallResource.
1328  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1329  if (!LiveRegDefs[i]) continue;
1330  if (LiveRegDefs[i] == SU) continue;
1331  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1332  if (RegAdded.insert(i).second)
1333  LRegs.push_back(i);
1334  }
1335 }
1336 
1337 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1338 static const uint32_t *getNodeRegMask(const SDNode *N) {
1339  for (const SDValue &Op : N->op_values())
1340  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1341  return RegOp->getRegMask();
1342  return nullptr;
1343 }
1344 
1345 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1346 /// scheduling of the given node to satisfy live physical register dependencies.
1347 /// If the specific node is the last one that's available to schedule, do
1348 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1349 bool ScheduleDAGRRList::
1350 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1351  if (NumLiveRegs == 0)
1352  return false;
1353 
1354  SmallSet<unsigned, 4> RegAdded;
1355  // If this node would clobber any "live" register, then it's not ready.
1356  //
1357  // If SU is the currently live definition of the same register that it uses,
1358  // then we are free to schedule it.
1359  for (SDep &Pred : SU->Preds) {
1360  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1361  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1362  RegAdded, LRegs, TRI);
1363  }
1364 
1365  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1366  if (Node->getOpcode() == ISD::INLINEASM ||
1367  Node->getOpcode() == ISD::INLINEASM_BR) {
1368  // Inline asm can clobber physical defs.
1369  unsigned NumOps = Node->getNumOperands();
1370  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1371  --NumOps; // Ignore the glue operand.
1372 
1373  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1374  unsigned Flags =
1375  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1376  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1377 
1378  ++i; // Skip the ID value.
1379  if (InlineAsm::isRegDefKind(Flags) ||
1381  InlineAsm::isClobberKind(Flags)) {
1382  // Check for def of register or earlyclobber register.
1383  for (; NumVals; --NumVals, ++i) {
1384  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1386  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1387  }
1388  } else
1389  i += NumVals;
1390  }
1391  continue;
1392  }
1393 
1394  if (Node->getOpcode() == ISD::CopyToReg) {
1395  Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1396  if (Reg.isPhysical()) {
1397  SDNode *SrcNode = Node->getOperand(2).getNode();
1398  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1399  SrcNode);
1400  }
1401  }
1402 
1403  if (!Node->isMachineOpcode())
1404  continue;
1405  // If we're in the middle of scheduling a call, don't begin scheduling
1406  // another call. Also, don't allow any physical registers to be live across
1407  // the call.
1408  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1409  // Check the special calling-sequence resource.
1410  unsigned CallResource = TRI->getNumRegs();
1411  if (LiveRegDefs[CallResource]) {
1412  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1413  while (SDNode *Glued = Gen->getGluedNode())
1414  Gen = Glued;
1415  if (!IsChainDependent(Gen, Node, 0, TII) &&
1416  RegAdded.insert(CallResource).second)
1417  LRegs.push_back(CallResource);
1418  }
1419  }
1420  if (const uint32_t *RegMask = getNodeRegMask(Node))
1421  CheckForLiveRegDefMasked(SU, RegMask,
1422  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1423  RegAdded, LRegs);
1424 
1425  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1426  if (MCID.hasOptionalDef()) {
1427  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1428  // This operand can be either a def of CPSR, if the S bit is set; or a use
1429  // of %noreg. When the OptionalDef is set to a valid register, we need to
1430  // handle it in the same way as an ImplicitDef.
1431  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1432  if (MCID.OpInfo[i].isOptionalDef()) {
1433  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1434  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1435  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1436  }
1437  }
1438  if (!MCID.ImplicitDefs)
1439  continue;
1440  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1441  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1442  }
1443 
1444  return !LRegs.empty();
1445 }
1446 
1447 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1448  // Add the nodes that aren't ready back onto the available list.
1449  for (unsigned i = Interferences.size(); i > 0; --i) {
1450  SUnit *SU = Interferences[i-1];
1451  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1452  if (Reg) {
1453  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1454  if (!is_contained(LRegs, Reg))
1455  continue;
1456  }
1457  SU->isPending = false;
1458  // The interfering node may no longer be available due to backtracking.
1459  // Furthermore, it may have been made available again, in which case it is
1460  // now already in the AvailableQueue.
1461  if (SU->isAvailable && !SU->NodeQueueId) {
1462  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1463  AvailableQueue->push(SU);
1464  }
1465  if (i < Interferences.size())
1466  Interferences[i-1] = Interferences.back();
1467  Interferences.pop_back();
1468  LRegsMap.erase(LRegsPos);
1469  }
1470 }
1471 
1472 /// Return a node that can be scheduled in this cycle. Requirements:
1473 /// (1) Ready: latency has been satisfied
1474 /// (2) No Hazards: resources are available
1475 /// (3) No Interferences: may unschedule to break register interferences.
1476 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1477  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1478  auto FindAvailableNode = [&]() {
1479  while (CurSU) {
1481  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1482  break;
1483  LLVM_DEBUG(dbgs() << " Interfering reg ";
1484  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1485  else dbgs() << printReg(LRegs[0], TRI);
1486  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1487  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1488  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1489  if (LRegsPair.second) {
1490  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1491  Interferences.push_back(CurSU);
1492  }
1493  else {
1494  assert(CurSU->isPending && "Interferences are pending");
1495  // Update the interference with current live regs.
1496  LRegsPair.first->second = LRegs;
1497  }
1498  CurSU = AvailableQueue->pop();
1499  }
1500  };
1501  FindAvailableNode();
1502  if (CurSU)
1503  return CurSU;
1504 
1505  // We query the topological order in the loop body, so make sure outstanding
1506  // updates are applied before entering it (we only enter the loop if there
1507  // are some interferences). If we make changes to the ordering, we exit
1508  // the loop.
1509 
1510  // All candidates are delayed due to live physical reg dependencies.
1511  // Try backtracking, code duplication, or inserting cross class copies
1512  // to resolve it.
1513  for (SUnit *TrySU : Interferences) {
1514  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1515 
1516  // Try unscheduling up to the point where it's safe to schedule
1517  // this node.
1518  SUnit *BtSU = nullptr;
1519  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1520  for (unsigned Reg : LRegs) {
1521  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1522  BtSU = LiveRegGens[Reg];
1523  LiveCycle = BtSU->getHeight();
1524  }
1525  }
1526  if (!WillCreateCycle(TrySU, BtSU)) {
1527  // BacktrackBottomUp mutates Interferences!
1528  BacktrackBottomUp(TrySU, BtSU);
1529 
1530  // Force the current node to be scheduled before the node that
1531  // requires the physical reg dep.
1532  if (BtSU->isAvailable) {
1533  BtSU->isAvailable = false;
1534  if (!BtSU->isPending)
1535  AvailableQueue->remove(BtSU);
1536  }
1537  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1538  << ") to SU(" << TrySU->NodeNum << ")\n");
1539  AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1540 
1541  // If one or more successors has been unscheduled, then the current
1542  // node is no longer available.
1543  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1544  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1545  CurSU = AvailableQueue->pop();
1546  } else {
1547  LLVM_DEBUG(dbgs() << "TrySU available\n");
1548  // Available and in AvailableQueue
1549  AvailableQueue->remove(TrySU);
1550  CurSU = TrySU;
1551  }
1552  FindAvailableNode();
1553  // Interferences has been mutated. We must break.
1554  break;
1555  }
1556  }
1557 
1558  if (!CurSU) {
1559  // Can't backtrack. If it's too expensive to copy the value, then try
1560  // duplicate the nodes that produces these "too expensive to copy"
1561  // values to break the dependency. In case even that doesn't work,
1562  // insert cross class copies.
1563  // If it's not too expensive, i.e. cost != -1, issue copies.
1564  SUnit *TrySU = Interferences[0];
1565  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1566  assert(LRegs.size() == 1 && "Can't handle this yet!");
1567  unsigned Reg = LRegs[0];
1568  SUnit *LRDef = LiveRegDefs[Reg];
1569  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1570  const TargetRegisterClass *RC =
1572  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1573 
1574  // If cross copy register class is the same as RC, then it must be possible
1575  // copy the value directly. Do not try duplicate the def.
1576  // If cross copy register class is not the same as RC, then it's possible to
1577  // copy the value but it require cross register class copies and it is
1578  // expensive.
1579  // If cross copy register class is null, then it's not possible to copy
1580  // the value at all.
1581  SUnit *NewDef = nullptr;
1582  if (DestRC != RC) {
1583  NewDef = CopyAndMoveSuccessors(LRDef);
1584  if (!DestRC && !NewDef)
1585  report_fatal_error("Can't handle live physical register dependency!");
1586  }
1587  if (!NewDef) {
1588  // Issue copies, these can be expensive cross register class copies.
1590  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1591  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1592  << " to SU #" << Copies.front()->NodeNum << "\n");
1593  AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1594  NewDef = Copies.back();
1595  }
1596 
1597  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1598  << " to SU #" << TrySU->NodeNum << "\n");
1599  LiveRegDefs[Reg] = NewDef;
1600  AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1601  TrySU->isAvailable = false;
1602  CurSU = NewDef;
1603  }
1604  assert(CurSU && "Unable to resolve live physical register dependencies!");
1605  return CurSU;
1606 }
1607 
1608 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1609 /// schedulers.
1610 void ScheduleDAGRRList::ListScheduleBottomUp() {
1611  // Release any predecessors of the special Exit node.
1612  ReleasePredecessors(&ExitSU);
1613 
1614  // Add root to Available queue.
1615  if (!SUnits.empty()) {
1616  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1617  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1618  RootSU->isAvailable = true;
1619  AvailableQueue->push(RootSU);
1620  }
1621 
1622  // While Available queue is not empty, grab the node with the highest
1623  // priority. If it is not ready put it back. Schedule the node.
1624  Sequence.reserve(SUnits.size());
1625  while (!AvailableQueue->empty() || !Interferences.empty()) {
1626  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1627  AvailableQueue->dump(this));
1628 
1629  // Pick the best node to schedule taking all constraints into
1630  // consideration.
1631  SUnit *SU = PickNodeToScheduleBottomUp();
1632 
1633  AdvancePastStalls(SU);
1634 
1635  ScheduleNodeBottomUp(SU);
1636 
1637  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1638  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1639  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1640  "MinAvailableCycle uninitialized");
1641  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1642  }
1643  }
1644 
1645  // Reverse the order if it is bottom up.
1646  std::reverse(Sequence.begin(), Sequence.end());
1647 
1648 #ifndef NDEBUG
1649  VerifyScheduledSequence(/*isBottomUp=*/true);
1650 #endif
1651 }
1652 
1653 namespace {
1654 
1655 class RegReductionPQBase;
1656 
1657 struct queue_sort {
1658  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1659 };
1660 
1661 #ifndef NDEBUG
1662 template<class SF>
1663 struct reverse_sort : public queue_sort {
1664  SF &SortFunc;
1665 
1666  reverse_sort(SF &sf) : SortFunc(sf) {}
1667 
1668  bool operator()(SUnit* left, SUnit* right) const {
1669  // reverse left/right rather than simply !SortFunc(left, right)
1670  // to expose different paths in the comparison logic.
1671  return SortFunc(right, left);
1672  }
1673 };
1674 #endif // NDEBUG
1675 
1676 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1677 // reduction scheduler.
1678 struct bu_ls_rr_sort : public queue_sort {
1679  enum {
1680  IsBottomUp = true,
1681  HasReadyFilter = false
1682  };
1683 
1684  RegReductionPQBase *SPQ;
1685 
1686  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1687 
1688  bool operator()(SUnit* left, SUnit* right) const;
1689 };
1690 
1691 // src_ls_rr_sort - Priority function for source order scheduler.
1692 struct src_ls_rr_sort : public queue_sort {
1693  enum {
1694  IsBottomUp = true,
1695  HasReadyFilter = false
1696  };
1697 
1698  RegReductionPQBase *SPQ;
1699 
1700  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1701 
1702  bool operator()(SUnit* left, SUnit* right) const;
1703 };
1704 
1705 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1706 struct hybrid_ls_rr_sort : public queue_sort {
1707  enum {
1708  IsBottomUp = true,
1709  HasReadyFilter = false
1710  };
1711 
1712  RegReductionPQBase *SPQ;
1713 
1714  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1715 
1716  bool isReady(SUnit *SU, unsigned CurCycle) const;
1717 
1718  bool operator()(SUnit* left, SUnit* right) const;
1719 };
1720 
1721 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1722 // scheduler.
1723 struct ilp_ls_rr_sort : public queue_sort {
1724  enum {
1725  IsBottomUp = true,
1726  HasReadyFilter = false
1727  };
1728 
1729  RegReductionPQBase *SPQ;
1730 
1731  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1732 
1733  bool isReady(SUnit *SU, unsigned CurCycle) const;
1734 
1735  bool operator()(SUnit* left, SUnit* right) const;
1736 };
1737 
1738 class RegReductionPQBase : public SchedulingPriorityQueue {
1739 protected:
1740  std::vector<SUnit *> Queue;
1741  unsigned CurQueueId = 0;
1742  bool TracksRegPressure;
1743  bool SrcOrder;
1744 
1745  // SUnits - The SUnits for the current graph.
1746  std::vector<SUnit> *SUnits;
1747 
1748  MachineFunction &MF;
1749  const TargetInstrInfo *TII;
1750  const TargetRegisterInfo *TRI;
1751  const TargetLowering *TLI;
1752  ScheduleDAGRRList *scheduleDAG = nullptr;
1753 
1754  // SethiUllmanNumbers - The SethiUllman number for each node.
1755  std::vector<unsigned> SethiUllmanNumbers;
1756 
1757  /// RegPressure - Tracking current reg pressure per register class.
1758  std::vector<unsigned> RegPressure;
1759 
1760  /// RegLimit - Tracking the number of allocatable registers per register
1761  /// class.
1762  std::vector<unsigned> RegLimit;
1763 
1764 public:
1765  RegReductionPQBase(MachineFunction &mf,
1766  bool hasReadyFilter,
1767  bool tracksrp,
1768  bool srcorder,
1769  const TargetInstrInfo *tii,
1770  const TargetRegisterInfo *tri,
1771  const TargetLowering *tli)
1772  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1773  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1774  if (TracksRegPressure) {
1775  unsigned NumRC = TRI->getNumRegClasses();
1776  RegLimit.resize(NumRC);
1777  RegPressure.resize(NumRC);
1778  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1779  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1780  for (const TargetRegisterClass *RC : TRI->regclasses())
1781  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1782  }
1783  }
1784 
1785  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1786  scheduleDAG = scheduleDag;
1787  }
1788 
1789  ScheduleHazardRecognizer* getHazardRec() {
1790  return scheduleDAG->getHazardRec();
1791  }
1792 
1793  void initNodes(std::vector<SUnit> &sunits) override;
1794 
1795  void addNode(const SUnit *SU) override;
1796 
1797  void updateNode(const SUnit *SU) override;
1798 
1799  void releaseState() override {
1800  SUnits = nullptr;
1801  SethiUllmanNumbers.clear();
1802  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1803  }
1804 
1805  unsigned getNodePriority(const SUnit *SU) const;
1806 
1807  unsigned getNodeOrdering(const SUnit *SU) const {
1808  if (!SU->getNode()) return 0;
1809 
1810  return SU->getNode()->getIROrder();
1811  }
1812 
1813  bool empty() const override { return Queue.empty(); }
1814 
1815  void push(SUnit *U) override {
1816  assert(!U->NodeQueueId && "Node in the queue already");
1817  U->NodeQueueId = ++CurQueueId;
1818  Queue.push_back(U);
1819  }
1820 
1821  void remove(SUnit *SU) override {
1822  assert(!Queue.empty() && "Queue is empty!");
1823  assert(SU->NodeQueueId != 0 && "Not in queue!");
1824  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1825  if (I != std::prev(Queue.end()))
1826  std::swap(*I, Queue.back());
1827  Queue.pop_back();
1828  SU->NodeQueueId = 0;
1829  }
1830 
1831  bool tracksRegPressure() const override { return TracksRegPressure; }
1832 
1833  void dumpRegPressure() const;
1834 
1835  bool HighRegPressure(const SUnit *SU) const;
1836 
1837  bool MayReduceRegPressure(SUnit *SU) const;
1838 
1839  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1840 
1841  void scheduledNode(SUnit *SU) override;
1842 
1843  void unscheduledNode(SUnit *SU) override;
1844 
1845 protected:
1846  bool canClobber(const SUnit *SU, const SUnit *Op);
1847  void AddPseudoTwoAddrDeps();
1848  void PrescheduleNodesWithMultipleUses();
1849  void CalculateSethiUllmanNumbers();
1850 };
1851 
1852 template<class SF>
1853 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1854  unsigned BestIdx = 0;
1855  // Only compute the cost for the first 1000 items in the queue, to avoid
1856  // excessive compile-times for very large queues.
1857  for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1858  I++)
1859  if (Picker(Q[BestIdx], Q[I]))
1860  BestIdx = I;
1861  SUnit *V = Q[BestIdx];
1862  if (BestIdx + 1 != Q.size())
1863  std::swap(Q[BestIdx], Q.back());
1864  Q.pop_back();
1865  return V;
1866 }
1867 
1868 template<class SF>
1869 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1870 #ifndef NDEBUG
1871  if (DAG->StressSched) {
1872  reverse_sort<SF> RPicker(Picker);
1873  return popFromQueueImpl(Q, RPicker);
1874  }
1875 #endif
1876  (void)DAG;
1877  return popFromQueueImpl(Q, Picker);
1878 }
1879 
1880 //===----------------------------------------------------------------------===//
1881 // RegReductionPriorityQueue Definition
1882 //===----------------------------------------------------------------------===//
1883 //
1884 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1885 // to reduce register pressure.
1886 //
1887 template<class SF>
1888 class RegReductionPriorityQueue : public RegReductionPQBase {
1889  SF Picker;
1890 
1891 public:
1892  RegReductionPriorityQueue(MachineFunction &mf,
1893  bool tracksrp,
1894  bool srcorder,
1895  const TargetInstrInfo *tii,
1896  const TargetRegisterInfo *tri,
1897  const TargetLowering *tli)
1898  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1899  tii, tri, tli),
1900  Picker(this) {}
1901 
1902  bool isBottomUp() const override { return SF::IsBottomUp; }
1903 
1904  bool isReady(SUnit *U) const override {
1905  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1906  }
1907 
1908  SUnit *pop() override {
1909  if (Queue.empty()) return nullptr;
1910 
1911  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1912  V->NodeQueueId = 0;
1913  return V;
1914  }
1915 
1916 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1917  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1918  // Emulate pop() without clobbering NodeQueueIds.
1919  std::vector<SUnit *> DumpQueue = Queue;
1920  SF DumpPicker = Picker;
1921  while (!DumpQueue.empty()) {
1922  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1923  dbgs() << "Height " << SU->getHeight() << ": ";
1924  DAG->dumpNode(*SU);
1925  }
1926  }
1927 #endif
1928 };
1929 
1930 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1931 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1932 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1933 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1934 
1935 } // end anonymous namespace
1936 
1937 //===----------------------------------------------------------------------===//
1938 // Static Node Priority for Register Pressure Reduction
1939 //===----------------------------------------------------------------------===//
1940 
1941 // Check for special nodes that bypass scheduling heuristics.
1942 // Currently this pushes TokenFactor nodes down, but may be used for other
1943 // pseudo-ops as well.
1944 //
1945 // Return -1 to schedule right above left, 1 for left above right.
1946 // Return 0 if no bias exists.
1947 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1948  bool LSchedLow = left->isScheduleLow;
1949  bool RSchedLow = right->isScheduleLow;
1950  if (LSchedLow != RSchedLow)
1951  return LSchedLow < RSchedLow ? 1 : -1;
1952  return 0;
1953 }
1954 
1955 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1956 /// Smaller number is the higher priority.
1957 static unsigned
1958 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1959  if (SUNumbers[SU->NodeNum] != 0)
1960  return SUNumbers[SU->NodeNum];
1961 
1962  // Use WorkList to avoid stack overflow on excessively large IRs.
1963  struct WorkState {
1964  WorkState(const SUnit *SU) : SU(SU) {}
1965  const SUnit *SU;
1966  unsigned PredsProcessed = 0;
1967  };
1968 
1969  SmallVector<WorkState, 16> WorkList;
1970  WorkList.push_back(SU);
1971  while (!WorkList.empty()) {
1972  auto &Temp = WorkList.back();
1973  auto *TempSU = Temp.SU;
1974  bool AllPredsKnown = true;
1975  // Try to find a non-evaluated pred and push it into the processing stack.
1976  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1977  auto &Pred = TempSU->Preds[P];
1978  if (Pred.isCtrl()) continue; // ignore chain preds
1979  SUnit *PredSU = Pred.getSUnit();
1980  if (SUNumbers[PredSU->NodeNum] == 0) {
1981 #ifndef NDEBUG
1982  // In debug mode, check that we don't have such element in the stack.
1983  for (auto It : WorkList)
1984  assert(It.SU != PredSU && "Trying to push an element twice?");
1985 #endif
1986  // Next time start processing this one starting from the next pred.
1987  Temp.PredsProcessed = P + 1;
1988  WorkList.push_back(PredSU);
1989  AllPredsKnown = false;
1990  break;
1991  }
1992  }
1993 
1994  if (!AllPredsKnown)
1995  continue;
1996 
1997  // Once all preds are known, we can calculate the answer for this one.
1998  unsigned SethiUllmanNumber = 0;
1999  unsigned Extra = 0;
2000  for (const SDep &Pred : TempSU->Preds) {
2001  if (Pred.isCtrl()) continue; // ignore chain preds
2002  SUnit *PredSU = Pred.getSUnit();
2003  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
2004  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2005  if (PredSethiUllman > SethiUllmanNumber) {
2006  SethiUllmanNumber = PredSethiUllman;
2007  Extra = 0;
2008  } else if (PredSethiUllman == SethiUllmanNumber)
2009  ++Extra;
2010  }
2011 
2012  SethiUllmanNumber += Extra;
2013  if (SethiUllmanNumber == 0)
2014  SethiUllmanNumber = 1;
2015  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2016  WorkList.pop_back();
2017  }
2018 
2019  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2020  return SUNumbers[SU->NodeNum];
2021 }
2022 
2023 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2024 /// scheduling units.
2025 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2026  SethiUllmanNumbers.assign(SUnits->size(), 0);
2027 
2028  for (const SUnit &SU : *SUnits)
2029  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2030 }
2031 
2032 void RegReductionPQBase::addNode(const SUnit *SU) {
2033  unsigned SUSize = SethiUllmanNumbers.size();
2034  if (SUnits->size() > SUSize)
2035  SethiUllmanNumbers.resize(SUSize*2, 0);
2036  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2037 }
2038 
2039 void RegReductionPQBase::updateNode(const SUnit *SU) {
2040  SethiUllmanNumbers[SU->NodeNum] = 0;
2041  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2042 }
2043 
2044 // Lower priority means schedule further down. For bottom-up scheduling, lower
2045 // priority SUs are scheduled before higher priority SUs.
2046 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2047  assert(SU->NodeNum < SethiUllmanNumbers.size());
2048  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2049  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2050  // CopyToReg should be close to its uses to facilitate coalescing and
2051  // avoid spilling.
2052  return 0;
2053  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2054  Opc == TargetOpcode::SUBREG_TO_REG ||
2055  Opc == TargetOpcode::INSERT_SUBREG)
2056  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2057  // close to their uses to facilitate coalescing.
2058  return 0;
2059  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2060  // If SU does not have a register use, i.e. it doesn't produce a value
2061  // that would be consumed (e.g. store), then it terminates a chain of
2062  // computation. Give it a large SethiUllman number so it will be
2063  // scheduled right before its predecessors that it doesn't lengthen
2064  // their live ranges.
2065  return 0xffff;
2066  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2067  // If SU does not have a register def, schedule it close to its uses
2068  // because it does not lengthen any live ranges.
2069  return 0;
2070 #if 1
2071  return SethiUllmanNumbers[SU->NodeNum];
2072 #else
2073  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2074  if (SU->isCallOp) {
2075  // FIXME: This assumes all of the defs are used as call operands.
2076  int NP = (int)Priority - SU->getNode()->getNumValues();
2077  return (NP > 0) ? NP : 0;
2078  }
2079  return Priority;
2080 #endif
2081 }
2082 
2083 //===----------------------------------------------------------------------===//
2084 // Register Pressure Tracking
2085 //===----------------------------------------------------------------------===//
2086 
2087 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2088 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2089  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2090  unsigned Id = RC->getID();
2091  unsigned RP = RegPressure[Id];
2092  if (!RP) continue;
2093  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2094  << RegLimit[Id] << '\n');
2095  }
2096 }
2097 #endif
2098 
2099 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2100  if (!TLI)
2101  return false;
2102 
2103  for (const SDep &Pred : SU->Preds) {
2104  if (Pred.isCtrl())
2105  continue;
2106  SUnit *PredSU = Pred.getSUnit();
2107  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2108  // to cover the number of registers defined (they are all live).
2109  if (PredSU->NumRegDefsLeft == 0) {
2110  continue;
2111  }
2112  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2113  RegDefPos.IsValid(); RegDefPos.Advance()) {
2114  unsigned RCId, Cost;
2115  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2116 
2117  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2118  return true;
2119  }
2120  }
2121  return false;
2122 }
2123 
2124 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2125  const SDNode *N = SU->getNode();
2126 
2127  if (!N->isMachineOpcode() || !SU->NumSuccs)
2128  return false;
2129 
2130  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2131  for (unsigned i = 0; i != NumDefs; ++i) {
2132  MVT VT = N->getSimpleValueType(i);
2133  if (!N->hasAnyUseOfValue(i))
2134  continue;
2135  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2136  if (RegPressure[RCId] >= RegLimit[RCId])
2137  return true;
2138  }
2139  return false;
2140 }
2141 
2142 // Compute the register pressure contribution by this instruction by count up
2143 // for uses that are not live and down for defs. Only count register classes
2144 // that are already under high pressure. As a side effect, compute the number of
2145 // uses of registers that are already live.
2146 //
2147 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2148 // so could probably be factored.
2149 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2150  LiveUses = 0;
2151  int PDiff = 0;
2152  for (const SDep &Pred : SU->Preds) {
2153  if (Pred.isCtrl())
2154  continue;
2155  SUnit *PredSU = Pred.getSUnit();
2156  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2157  // to cover the number of registers defined (they are all live).
2158  if (PredSU->NumRegDefsLeft == 0) {
2159  if (PredSU->getNode()->isMachineOpcode())
2160  ++LiveUses;
2161  continue;
2162  }
2163  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2164  RegDefPos.IsValid(); RegDefPos.Advance()) {
2165  MVT VT = RegDefPos.GetValue();
2166  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2167  if (RegPressure[RCId] >= RegLimit[RCId])
2168  ++PDiff;
2169  }
2170  }
2171  const SDNode *N = SU->getNode();
2172 
2173  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2174  return PDiff;
2175 
2176  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2177  for (unsigned i = 0; i != NumDefs; ++i) {
2178  MVT VT = N->getSimpleValueType(i);
2179  if (!N->hasAnyUseOfValue(i))
2180  continue;
2181  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2182  if (RegPressure[RCId] >= RegLimit[RCId])
2183  --PDiff;
2184  }
2185  return PDiff;
2186 }
2187 
2188 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2189  if (!TracksRegPressure)
2190  return;
2191 
2192  if (!SU->getNode())
2193  return;
2194 
2195  for (const SDep &Pred : SU->Preds) {
2196  if (Pred.isCtrl())
2197  continue;
2198  SUnit *PredSU = Pred.getSUnit();
2199  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2200  // to cover the number of registers defined (they are all live).
2201  if (PredSU->NumRegDefsLeft == 0) {
2202  continue;
2203  }
2204  // FIXME: The ScheduleDAG currently loses information about which of a
2205  // node's values is consumed by each dependence. Consequently, if the node
2206  // defines multiple register classes, we don't know which to pressurize
2207  // here. Instead the following loop consumes the register defs in an
2208  // arbitrary order. At least it handles the common case of clustered loads
2209  // to the same class. For precise liveness, each SDep needs to indicate the
2210  // result number. But that tightly couples the ScheduleDAG with the
2211  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2212  // value type or register class to SDep.
2213  //
2214  // The most important aspect of register tracking is balancing the increase
2215  // here with the reduction further below. Note that this SU may use multiple
2216  // defs in PredSU. The can't be determined here, but we've already
2217  // compensated by reducing NumRegDefsLeft in PredSU during
2218  // ScheduleDAGSDNodes::AddSchedEdges.
2219  --PredSU->NumRegDefsLeft;
2220  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2221  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2222  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2223  if (SkipRegDefs)
2224  continue;
2225 
2226  unsigned RCId, Cost;
2227  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2228  RegPressure[RCId] += Cost;
2229  break;
2230  }
2231  }
2232 
2233  // We should have this assert, but there may be dead SDNodes that never
2234  // materialize as SUnits, so they don't appear to generate liveness.
2235  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2236  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2237  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2238  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2239  if (SkipRegDefs > 0)
2240  continue;
2241  unsigned RCId, Cost;
2242  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2243  if (RegPressure[RCId] < Cost) {
2244  // Register pressure tracking is imprecise. This can happen. But we try
2245  // hard not to let it happen because it likely results in poor scheduling.
2246  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2247  << ") has too many regdefs\n");
2248  RegPressure[RCId] = 0;
2249  }
2250  else {
2251  RegPressure[RCId] -= Cost;
2252  }
2253  }
2254  LLVM_DEBUG(dumpRegPressure());
2255 }
2256 
2257 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2258  if (!TracksRegPressure)
2259  return;
2260 
2261  const SDNode *N = SU->getNode();
2262  if (!N) return;
2263 
2264  if (!N->isMachineOpcode()) {
2265  if (N->getOpcode() != ISD::CopyToReg)
2266  return;
2267  } else {
2268  unsigned Opc = N->getMachineOpcode();
2269  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2270  Opc == TargetOpcode::INSERT_SUBREG ||
2271  Opc == TargetOpcode::SUBREG_TO_REG ||
2272  Opc == TargetOpcode::REG_SEQUENCE ||
2273  Opc == TargetOpcode::IMPLICIT_DEF)
2274  return;
2275  }
2276 
2277  for (const SDep &Pred : SU->Preds) {
2278  if (Pred.isCtrl())
2279  continue;
2280  SUnit *PredSU = Pred.getSUnit();
2281  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2282  // counts data deps.
2283  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2284  continue;
2285  const SDNode *PN = PredSU->getNode();
2286  if (!PN->isMachineOpcode()) {
2287  if (PN->getOpcode() == ISD::CopyFromReg) {
2288  MVT VT = PN->getSimpleValueType(0);
2289  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2290  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2291  }
2292  continue;
2293  }
2294  unsigned POpc = PN->getMachineOpcode();
2295  if (POpc == TargetOpcode::IMPLICIT_DEF)
2296  continue;
2297  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2298  POpc == TargetOpcode::INSERT_SUBREG ||
2299  POpc == TargetOpcode::SUBREG_TO_REG) {
2300  MVT VT = PN->getSimpleValueType(0);
2301  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2302  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2303  continue;
2304  }
2305  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2306  for (unsigned i = 0; i != NumDefs; ++i) {
2307  MVT VT = PN->getSimpleValueType(i);
2308  if (!PN->hasAnyUseOfValue(i))
2309  continue;
2310  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2311  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2312  // Register pressure tracking is imprecise. This can happen.
2313  RegPressure[RCId] = 0;
2314  else
2315  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2316  }
2317  }
2318 
2319  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2320  // may transfer data dependencies to CopyToReg.
2321  if (SU->NumSuccs && N->isMachineOpcode()) {
2322  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2323  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2324  MVT VT = N->getSimpleValueType(i);
2325  if (VT == MVT::Glue || VT == MVT::Other)
2326  continue;
2327  if (!N->hasAnyUseOfValue(i))
2328  continue;
2329  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2330  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2331  }
2332  }
2333 
2334  LLVM_DEBUG(dumpRegPressure());
2335 }
2336 
2337 //===----------------------------------------------------------------------===//
2338 // Dynamic Node Priority for Register Pressure Reduction
2339 //===----------------------------------------------------------------------===//
2340 
2341 /// closestSucc - Returns the scheduled cycle of the successor which is
2342 /// closest to the current cycle.
2343 static unsigned closestSucc(const SUnit *SU) {
2344  unsigned MaxHeight = 0;
2345  for (const SDep &Succ : SU->Succs) {
2346  if (Succ.isCtrl()) continue; // ignore chain succs
2347  unsigned Height = Succ.getSUnit()->getHeight();
2348  // If there are bunch of CopyToRegs stacked up, they should be considered
2349  // to be at the same position.
2350  if (Succ.getSUnit()->getNode() &&
2351  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2352  Height = closestSucc(Succ.getSUnit())+1;
2353  if (Height > MaxHeight)
2354  MaxHeight = Height;
2355  }
2356  return MaxHeight;
2357 }
2358 
2359 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2360 /// for scratch registers, i.e. number of data dependencies.
2361 static unsigned calcMaxScratches(const SUnit *SU) {
2362  unsigned Scratches = 0;
2363  for (const SDep &Pred : SU->Preds) {
2364  if (Pred.isCtrl()) continue; // ignore chain preds
2365  Scratches++;
2366  }
2367  return Scratches;
2368 }
2369 
2370 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2371 /// CopyFromReg from a virtual register.
2372 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2373  bool RetVal = false;
2374  for (const SDep &Pred : SU->Preds) {
2375  if (Pred.isCtrl()) continue;
2376  const SUnit *PredSU = Pred.getSUnit();
2377  if (PredSU->getNode() &&
2378  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2379  unsigned Reg =
2380  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2382  RetVal = true;
2383  continue;
2384  }
2385  }
2386  return false;
2387  }
2388  return RetVal;
2389 }
2390 
2391 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2392 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2393 /// it has no other use. It should be scheduled closer to the terminator.
2394 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2395  bool RetVal = false;
2396  for (const SDep &Succ : SU->Succs) {
2397  if (Succ.isCtrl()) continue;
2398  const SUnit *SuccSU = Succ.getSUnit();
2399  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2400  unsigned Reg =
2401  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2403  RetVal = true;
2404  continue;
2405  }
2406  }
2407  return false;
2408  }
2409  return RetVal;
2410 }
2411 
2412 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2413 // set isVRegCycle for its CopyFromReg operands.
2414 //
2415 // This is only relevant for single-block loops, in which case the VRegCycle
2416 // node is likely an induction variable in which the operand and target virtual
2417 // registers should be coalesced (e.g. pre/post increment values). Setting the
2418 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2419 // CopyFromReg so that this node becomes the virtual register "kill". This
2420 // avoids interference between the values live in and out of the block and
2421 // eliminates a copy inside the loop.
2422 static void initVRegCycle(SUnit *SU) {
2424  return;
2425 
2426  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2427  return;
2428 
2429  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2430 
2431  SU->isVRegCycle = true;
2432 
2433  for (const SDep &Pred : SU->Preds) {
2434  if (Pred.isCtrl()) continue;
2435  Pred.getSUnit()->isVRegCycle = true;
2436  }
2437 }
2438 
2439 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2440 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2441 static void resetVRegCycle(SUnit *SU) {
2442  if (!SU->isVRegCycle)
2443  return;
2444 
2445  for (const SDep &Pred : SU->Preds) {
2446  if (Pred.isCtrl()) continue; // ignore chain preds
2447  SUnit *PredSU = Pred.getSUnit();
2448  if (PredSU->isVRegCycle) {
2449  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2450  "VRegCycle def must be CopyFromReg");
2451  Pred.getSUnit()->isVRegCycle = false;
2452  }
2453  }
2454 }
2455 
2456 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2457 // means a node that defines the VRegCycle has not been scheduled yet.
2458 static bool hasVRegCycleUse(const SUnit *SU) {
2459  // If this SU also defines the VReg, don't hoist it as a "use".
2460  if (SU->isVRegCycle)
2461  return false;
2462 
2463  for (const SDep &Pred : SU->Preds) {
2464  if (Pred.isCtrl()) continue; // ignore chain preds
2465  if (Pred.getSUnit()->isVRegCycle &&
2466  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2467  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2468  return true;
2469  }
2470  }
2471  return false;
2472 }
2473 
2474 // Check for either a dependence (latency) or resource (hazard) stall.
2475 //
2476 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2477 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2478  if ((int)SPQ->getCurCycle() < Height) return true;
2479  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2481  return true;
2482  return false;
2483 }
2484 
2485 // Return -1 if left has higher priority, 1 if right has higher priority.
2486 // Return 0 if latency-based priority is equivalent.
2487 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2488  RegReductionPQBase *SPQ) {
2489  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2490  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2491  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2492  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2493  int LHeight = (int)left->getHeight() + LPenalty;
2494  int RHeight = (int)right->getHeight() + RPenalty;
2495 
2496  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2497  BUHasStall(left, LHeight, SPQ);
2498  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2499  BUHasStall(right, RHeight, SPQ);
2500 
2501  // If scheduling one of the node will cause a pipeline stall, delay it.
2502  // If scheduling either one of the node will cause a pipeline stall, sort
2503  // them according to their height.
2504  if (LStall) {
2505  if (!RStall)
2506  return 1;
2507  if (LHeight != RHeight)
2508  return LHeight > RHeight ? 1 : -1;
2509  } else if (RStall)
2510  return -1;
2511 
2512  // If either node is scheduling for latency, sort them by height/depth
2513  // and latency.
2514  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2515  right->SchedulingPref == Sched::ILP)) {
2516  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2517  // is enabled, grouping instructions by cycle, then its height is already
2518  // covered so only its depth matters. We also reach this point if both stall
2519  // but have the same height.
2520  if (!SPQ->getHazardRec()->isEnabled()) {
2521  if (LHeight != RHeight)
2522  return LHeight > RHeight ? 1 : -1;
2523  }
2524  int LDepth = left->getDepth() - LPenalty;
2525  int RDepth = right->getDepth() - RPenalty;
2526  if (LDepth != RDepth) {
2527  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2528  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2529  << ") depth " << RDepth << "\n");
2530  return LDepth < RDepth ? 1 : -1;
2531  }
2532  if (left->Latency != right->Latency)
2533  return left->Latency > right->Latency ? 1 : -1;
2534  }
2535  return 0;
2536 }
2537 
2538 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2539  // Schedule physical register definitions close to their use. This is
2540  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2541  // long as shortening physreg live ranges is generally good, we can defer
2542  // creating a subtarget hook.
2543  if (!DisableSchedPhysRegJoin) {
2544  bool LHasPhysReg = left->hasPhysRegDefs;
2545  bool RHasPhysReg = right->hasPhysRegDefs;
2546  if (LHasPhysReg != RHasPhysReg) {
2547  #ifndef NDEBUG
2548  static const char *const PhysRegMsg[] = { " has no physreg",
2549  " defines a physreg" };
2550  #endif
2551  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2552  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2553  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2554  return LHasPhysReg < RHasPhysReg;
2555  }
2556  }
2557 
2558  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2559  unsigned LPriority = SPQ->getNodePriority(left);
2560  unsigned RPriority = SPQ->getNodePriority(right);
2561 
2562  // Be really careful about hoisting call operands above previous calls.
2563  // Only allows it if it would reduce register pressure.
2564  if (left->isCall && right->isCallOp) {
2565  unsigned RNumVals = right->getNode()->getNumValues();
2566  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2567  }
2568  if (right->isCall && left->isCallOp) {
2569  unsigned LNumVals = left->getNode()->getNumValues();
2570  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2571  }
2572 
2573  if (LPriority != RPriority)
2574  return LPriority > RPriority;
2575 
2576  // One or both of the nodes are calls and their sethi-ullman numbers are the
2577  // same, then keep source order.
2578  if (left->isCall || right->isCall) {
2579  unsigned LOrder = SPQ->getNodeOrdering(left);
2580  unsigned ROrder = SPQ->getNodeOrdering(right);
2581 
2582  // Prefer an ordering where the lower the non-zero order number, the higher
2583  // the preference.
2584  if ((LOrder || ROrder) && LOrder != ROrder)
2585  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2586  }
2587 
2588  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2589  // e.g.
2590  // t1 = op t2, c1
2591  // t3 = op t4, c2
2592  //
2593  // and the following instructions are both ready.
2594  // t2 = op c3
2595  // t4 = op c4
2596  //
2597  // Then schedule t2 = op first.
2598  // i.e.
2599  // t4 = op c4
2600  // t2 = op c3
2601  // t1 = op t2, c1
2602  // t3 = op t4, c2
2603  //
2604  // This creates more short live intervals.
2605  unsigned LDist = closestSucc(left);
2606  unsigned RDist = closestSucc(right);
2607  if (LDist != RDist)
2608  return LDist < RDist;
2609 
2610  // How many registers becomes live when the node is scheduled.
2611  unsigned LScratch = calcMaxScratches(left);
2612  unsigned RScratch = calcMaxScratches(right);
2613  if (LScratch != RScratch)
2614  return LScratch > RScratch;
2615 
2616  // Comparing latency against a call makes little sense unless the node
2617  // is register pressure-neutral.
2618  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2619  return (left->NodeQueueId > right->NodeQueueId);
2620 
2621  // Do not compare latencies when one or both of the nodes are calls.
2622  if (!DisableSchedCycles &&
2623  !(left->isCall || right->isCall)) {
2624  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2625  if (result != 0)
2626  return result > 0;
2627  }
2628  else {
2629  if (left->getHeight() != right->getHeight())
2630  return left->getHeight() > right->getHeight();
2631 
2632  if (left->getDepth() != right->getDepth())
2633  return left->getDepth() < right->getDepth();
2634  }
2635 
2636  assert(left->NodeQueueId && right->NodeQueueId &&
2637  "NodeQueueId cannot be zero");
2638  return (left->NodeQueueId > right->NodeQueueId);
2639 }
2640 
2641 // Bottom up
2642 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2643  if (int res = checkSpecialNodes(left, right))
2644  return res > 0;
2645 
2646  return BURRSort(left, right, SPQ);
2647 }
2648 
2649 // Source order, otherwise bottom up.
2650 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2651  if (int res = checkSpecialNodes(left, right))
2652  return res > 0;
2653 
2654  unsigned LOrder = SPQ->getNodeOrdering(left);
2655  unsigned ROrder = SPQ->getNodeOrdering(right);
2656 
2657  // Prefer an ordering where the lower the non-zero order number, the higher
2658  // the preference.
2659  if ((LOrder || ROrder) && LOrder != ROrder)
2660  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2661 
2662  return BURRSort(left, right, SPQ);
2663 }
2664 
2665 // If the time between now and when the instruction will be ready can cover
2666 // the spill code, then avoid adding it to the ready queue. This gives long
2667 // stalls highest priority and allows hoisting across calls. It should also
2668 // speed up processing the available queue.
2669 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2670  static const unsigned ReadyDelay = 3;
2671 
2672  if (SPQ->MayReduceRegPressure(SU)) return true;
2673 
2674  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2675 
2676  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2678  return false;
2679 
2680  return true;
2681 }
2682 
2683 // Return true if right should be scheduled with higher priority than left.
2684 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2685  if (int res = checkSpecialNodes(left, right))
2686  return res > 0;
2687 
2688  if (left->isCall || right->isCall)
2689  // No way to compute latency of calls.
2690  return BURRSort(left, right, SPQ);
2691 
2692  bool LHigh = SPQ->HighRegPressure(left);
2693  bool RHigh = SPQ->HighRegPressure(right);
2694  // Avoid causing spills. If register pressure is high, schedule for
2695  // register pressure reduction.
2696  if (LHigh && !RHigh) {
2697  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2698  << right->NodeNum << ")\n");
2699  return true;
2700  }
2701  else if (!LHigh && RHigh) {
2702  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2703  << left->NodeNum << ")\n");
2704  return false;
2705  }
2706  if (!LHigh && !RHigh) {
2707  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2708  if (result != 0)
2709  return result > 0;
2710  }
2711  return BURRSort(left, right, SPQ);
2712 }
2713 
2714 // Schedule as many instructions in each cycle as possible. So don't make an
2715 // instruction available unless it is ready in the current cycle.
2716 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2717  if (SU->getHeight() > CurCycle) return false;
2718 
2719  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2721  return false;
2722 
2723  return true;
2724 }
2725 
2726 static bool canEnableCoalescing(SUnit *SU) {
2727  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2728  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2729  // CopyToReg should be close to its uses to facilitate coalescing and
2730  // avoid spilling.
2731  return true;
2732 
2733  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2734  Opc == TargetOpcode::SUBREG_TO_REG ||
2735  Opc == TargetOpcode::INSERT_SUBREG)
2736  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2737  // close to their uses to facilitate coalescing.
2738  return true;
2739 
2740  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2741  // If SU does not have a register def, schedule it close to its uses
2742  // because it does not lengthen any live ranges.
2743  return true;
2744 
2745  return false;
2746 }
2747 
2748 // list-ilp is currently an experimental scheduler that allows various
2749 // heuristics to be enabled prior to the normal register reduction logic.
2750 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2751  if (int res = checkSpecialNodes(left, right))
2752  return res > 0;
2753 
2754  if (left->isCall || right->isCall)
2755  // No way to compute latency of calls.
2756  return BURRSort(left, right, SPQ);
2757 
2758  unsigned LLiveUses = 0, RLiveUses = 0;
2759  int LPDiff = 0, RPDiff = 0;
2761  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2762  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2763  }
2764  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2765  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2766  << "): " << LPDiff << " != SU(" << right->NodeNum
2767  << "): " << RPDiff << "\n");
2768  return LPDiff > RPDiff;
2769  }
2770 
2771  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2772  bool LReduce = canEnableCoalescing(left);
2773  bool RReduce = canEnableCoalescing(right);
2774  if (LReduce && !RReduce) return false;
2775  if (RReduce && !LReduce) return true;
2776  }
2777 
2778  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2779  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2780  << " != SU(" << right->NodeNum << "): " << RLiveUses
2781  << "\n");
2782  return LLiveUses < RLiveUses;
2783  }
2784 
2785  if (!DisableSchedStalls) {
2786  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2787  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2788  if (LStall != RStall)
2789  return left->getHeight() > right->getHeight();
2790  }
2791 
2792  if (!DisableSchedCriticalPath) {
2793  int spread = (int)left->getDepth() - (int)right->getDepth();
2794  if (std::abs(spread) > MaxReorderWindow) {
2795  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2796  << left->getDepth() << " != SU(" << right->NodeNum
2797  << "): " << right->getDepth() << "\n");
2798  return left->getDepth() < right->getDepth();
2799  }
2800  }
2801 
2802  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2803  int spread = (int)left->getHeight() - (int)right->getHeight();
2804  if (std::abs(spread) > MaxReorderWindow)
2805  return left->getHeight() > right->getHeight();
2806  }
2807 
2808  return BURRSort(left, right, SPQ);
2809 }
2810 
2811 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2812  SUnits = &sunits;
2813  // Add pseudo dependency edges for two-address nodes.
2814  if (!Disable2AddrHack)
2815  AddPseudoTwoAddrDeps();
2816  // Reroute edges to nodes with multiple uses.
2817  if (!TracksRegPressure && !SrcOrder)
2818  PrescheduleNodesWithMultipleUses();
2819  // Calculate node priorities.
2820  CalculateSethiUllmanNumbers();
2821 
2822  // For single block loops, mark nodes that look like canonical IV increments.
2823  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2824  for (SUnit &SU : sunits)
2825  initVRegCycle(&SU);
2826 }
2827 
2828 //===----------------------------------------------------------------------===//
2829 // Preschedule for Register Pressure
2830 //===----------------------------------------------------------------------===//
2831 
2832 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2833  if (SU->isTwoAddress) {
2834  unsigned Opc = SU->getNode()->getMachineOpcode();
2835  const MCInstrDesc &MCID = TII->get(Opc);
2836  unsigned NumRes = MCID.getNumDefs();
2837  unsigned NumOps = MCID.getNumOperands() - NumRes;
2838  for (unsigned i = 0; i != NumOps; ++i) {
2839  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2840  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2841  if (DU->getNodeId() != -1 &&
2842  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2843  return true;
2844  }
2845  }
2846  }
2847  return false;
2848 }
2849 
2850 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2851 /// successor's explicit physregs whose definition can reach DepSU.
2852 /// i.e. DepSU should not be scheduled above SU.
2853 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2854  ScheduleDAGRRList *scheduleDAG,
2855  const TargetInstrInfo *TII,
2856  const TargetRegisterInfo *TRI) {
2857  const MCPhysReg *ImpDefs
2858  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2859  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2860  if(!ImpDefs && !RegMask)
2861  return false;
2862 
2863  for (const SDep &Succ : SU->Succs) {
2864  SUnit *SuccSU = Succ.getSUnit();
2865  for (const SDep &SuccPred : SuccSU->Preds) {
2866  if (!SuccPred.isAssignedRegDep())
2867  continue;
2868 
2869  if (RegMask &&
2870  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2871  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2872  return true;
2873 
2874  if (ImpDefs)
2875  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2876  // Return true if SU clobbers this physical register use and the
2877  // definition of the register reaches from DepSU. IsReachable queries
2878  // a topological forward sort of the DAG (following the successors).
2879  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2880  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2881  return true;
2882  }
2883  }
2884  return false;
2885 }
2886 
2887 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2888 /// physical register defs.
2889 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2890  const TargetInstrInfo *TII,
2891  const TargetRegisterInfo *TRI) {
2892  SDNode *N = SuccSU->getNode();
2893  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2894  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2895  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2896  for (const SDNode *SUNode = SU->getNode(); SUNode;
2897  SUNode = SUNode->getGluedNode()) {
2898  if (!SUNode->isMachineOpcode())
2899  continue;
2900  const MCPhysReg *SUImpDefs =
2901  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2902  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2903  if (!SUImpDefs && !SURegMask)
2904  continue;
2905  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2906  MVT VT = N->getSimpleValueType(i);
2907  if (VT == MVT::Glue || VT == MVT::Other)
2908  continue;
2909  if (!N->hasAnyUseOfValue(i))
2910  continue;
2911  unsigned Reg = ImpDefs[i - NumDefs];
2912  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2913  return true;
2914  if (!SUImpDefs)
2915  continue;
2916  for (;*SUImpDefs; ++SUImpDefs) {
2917  unsigned SUReg = *SUImpDefs;
2918  if (TRI->regsOverlap(Reg, SUReg))
2919  return true;
2920  }
2921  }
2922  }
2923  return false;
2924 }
2925 
2926 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2927 /// are not handled well by the general register pressure reduction
2928 /// heuristics. When presented with code like this:
2929 ///
2930 /// N
2931 /// / |
2932 /// / |
2933 /// U store
2934 /// |
2935 /// ...
2936 ///
2937 /// the heuristics tend to push the store up, but since the
2938 /// operand of the store has another use (U), this would increase
2939 /// the length of that other use (the U->N edge).
2940 ///
2941 /// This function transforms code like the above to route U's
2942 /// dependence through the store when possible, like this:
2943 ///
2944 /// N
2945 /// ||
2946 /// ||
2947 /// store
2948 /// |
2949 /// U
2950 /// |
2951 /// ...
2952 ///
2953 /// This results in the store being scheduled immediately
2954 /// after N, which shortens the U->N live range, reducing
2955 /// register pressure.
2956 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2957  // Visit all the nodes in topological order, working top-down.
2958  for (SUnit &SU : *SUnits) {
2959  // For now, only look at nodes with no data successors, such as stores.
2960  // These are especially important, due to the heuristics in
2961  // getNodePriority for nodes with no data successors.
2962  if (SU.NumSuccs != 0)
2963  continue;
2964  // For now, only look at nodes with exactly one data predecessor.
2965  if (SU.NumPreds != 1)
2966  continue;
2967  // Avoid prescheduling copies to virtual registers, which don't behave
2968  // like other nodes from the perspective of scheduling heuristics.
2969  if (SDNode *N = SU.getNode())
2970  if (N->getOpcode() == ISD::CopyToReg &&
2972  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2973  continue;
2974 
2975  SDNode *PredFrameSetup = nullptr;
2976  for (const SDep &Pred : SU.Preds)
2977  if (Pred.isCtrl() && Pred.getSUnit()) {
2978  // Find the predecessor which is not data dependence.
2979  SDNode *PredND = Pred.getSUnit()->getNode();
2980 
2981  // If PredND is FrameSetup, we should not pre-scheduled the node,
2982  // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2983  // ADJCALLSTACKUP may hold CallResource too long and make other
2984  // calls can't be scheduled. If there's no other available node
2985  // to schedule, the schedular will try to rename the register by
2986  // creating copy to avoid the conflict which will fail because
2987  // CallResource is not a real physical register.
2988  if (PredND && PredND->isMachineOpcode() &&
2989  (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2990  PredFrameSetup = PredND;
2991  break;
2992  }
2993  }
2994  // Skip the node has FrameSetup parent.
2995  if (PredFrameSetup != nullptr)
2996  continue;
2997 
2998  // Locate the single data predecessor.
2999  SUnit *PredSU = nullptr;
3000  for (const SDep &Pred : SU.Preds)
3001  if (!Pred.isCtrl()) {
3002  PredSU = Pred.getSUnit();
3003  break;
3004  }
3005  assert(PredSU);
3006 
3007  // Don't rewrite edges that carry physregs, because that requires additional
3008  // support infrastructure.
3009  if (PredSU->hasPhysRegDefs)
3010  continue;
3011  // Short-circuit the case where SU is PredSU's only data successor.
3012  if (PredSU->NumSuccs == 1)
3013  continue;
3014  // Avoid prescheduling to copies from virtual registers, which don't behave
3015  // like other nodes from the perspective of scheduling heuristics.
3016  if (SDNode *N = SU.getNode())
3017  if (N->getOpcode() == ISD::CopyFromReg &&
3019  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
3020  continue;
3021 
3022  // Perform checks on the successors of PredSU.
3023  for (const SDep &PredSucc : PredSU->Succs) {
3024  SUnit *PredSuccSU = PredSucc.getSUnit();
3025  if (PredSuccSU == &SU) continue;
3026  // If PredSU has another successor with no data successors, for
3027  // now don't attempt to choose either over the other.
3028  if (PredSuccSU->NumSuccs == 0)
3029  goto outer_loop_continue;
3030  // Don't break physical register dependencies.
3031  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3032  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3033  goto outer_loop_continue;
3034  // Don't introduce graph cycles.
3035  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3036  goto outer_loop_continue;
3037  }
3038 
3039  // Ok, the transformation is safe and the heuristics suggest it is
3040  // profitable. Update the graph.
3041  LLVM_DEBUG(
3042  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3043  << PredSU->NodeNum
3044  << " to guide scheduling in the presence of multiple uses\n");
3045  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3046  SDep Edge = PredSU->Succs[i];
3047  assert(!Edge.isAssignedRegDep());
3048  SUnit *SuccSU = Edge.getSUnit();
3049  if (SuccSU != &SU) {
3050  Edge.setSUnit(PredSU);
3051  scheduleDAG->RemovePred(SuccSU, Edge);
3052  scheduleDAG->AddPredQueued(&SU, Edge);
3053  Edge.setSUnit(&SU);
3054  scheduleDAG->AddPredQueued(SuccSU, Edge);
3055  --i;
3056  }
3057  }
3058  outer_loop_continue:;
3059  }
3060 }
3061 
3062 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3063 /// it as a def&use operand. Add a pseudo control edge from it to the other
3064 /// node (if it won't create a cycle) so the two-address one will be scheduled
3065 /// first (lower in the schedule). If both nodes are two-address, favor the
3066 /// one that has a CopyToReg use (more likely to be a loop induction update).
3067 /// If both are two-address, but one is commutable while the other is not
3068 /// commutable, favor the one that's not commutable.
3069 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3070  for (SUnit &SU : *SUnits) {
3071  if (!SU.isTwoAddress)
3072  continue;
3073 
3074  SDNode *Node = SU.getNode();
3075  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3076  continue;
3077 
3078  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3079  unsigned Opc = Node->getMachineOpcode();
3080  const MCInstrDesc &MCID = TII->get(Opc);
3081  unsigned NumRes = MCID.getNumDefs();
3082  unsigned NumOps = MCID.getNumOperands() - NumRes;
3083  for (unsigned j = 0; j != NumOps; ++j) {
3084  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3085  continue;
3086  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3087  if (DU->getNodeId() == -1)
3088  continue;
3089  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3090  if (!DUSU)
3091  continue;
3092  for (const SDep &Succ : DUSU->Succs) {
3093  if (Succ.isCtrl())
3094  continue;
3095  SUnit *SuccSU = Succ.getSUnit();
3096  if (SuccSU == &SU)
3097  continue;
3098  // Be conservative. Ignore if nodes aren't at roughly the same
3099  // depth and height.
3100  if (SuccSU->getHeight() < SU.getHeight() &&
3101  (SU.getHeight() - SuccSU->getHeight()) > 1)
3102  continue;
3103  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3104  // constrains whatever is using the copy, instead of the copy
3105  // itself. In the case that the copy is coalesced, this
3106  // preserves the intent of the pseudo two-address heurietics.
3107  while (SuccSU->Succs.size() == 1 &&
3108  SuccSU->getNode()->isMachineOpcode() &&
3109  SuccSU->getNode()->getMachineOpcode() ==
3110  TargetOpcode::COPY_TO_REGCLASS)
3111  SuccSU = SuccSU->Succs.front().getSUnit();
3112  // Don't constrain non-instruction nodes.
3113  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3114  continue;
3115  // Don't constrain nodes with physical register defs if the
3116  // predecessor can clobber them.
3117  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3118  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3119  continue;
3120  }
3121  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3122  // these may be coalesced away. We want them close to their uses.
3123  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3124  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3125  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3126  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3127  continue;
3128  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3129  (!canClobber(SuccSU, DUSU) ||
3130  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3131  (!SU.isCommutable && SuccSU->isCommutable)) &&
3132  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3133  LLVM_DEBUG(dbgs()
3134  << " Adding a pseudo-two-addr edge from SU #"
3135  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3136  scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3137  }
3138  }
3139  }
3140  }
3141 }
3142 
3143 //===----------------------------------------------------------------------===//
3144 // Public Constructor Functions
3145 //===----------------------------------------------------------------------===//
3146 
3149  CodeGenOpt::Level OptLevel) {
3150  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3151  const TargetInstrInfo *TII = STI.getInstrInfo();
3152  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3153 
3154  BURegReductionPriorityQueue *PQ =
3155  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3156  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3157  PQ->setScheduleDAG(SD);
3158  return SD;
3159 }
3160 
3163  CodeGenOpt::Level OptLevel) {
3164  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3165  const TargetInstrInfo *TII = STI.getInstrInfo();
3166  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3167 
3168  SrcRegReductionPriorityQueue *PQ =
3169  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3170  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3171  PQ->setScheduleDAG(SD);
3172  return SD;
3173 }
3174 
3177  CodeGenOpt::Level OptLevel) {
3178  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3179  const TargetInstrInfo *TII = STI.getInstrInfo();
3180  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3181  const TargetLowering *TLI = IS->TLI;
3182 
3183  HybridBURRPriorityQueue *PQ =
3184  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3185 
3186  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3187  PQ->setScheduleDAG(SD);
3188  return SD;
3189 }
3190 
3193  CodeGenOpt::Level OptLevel) {
3194  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3195  const TargetInstrInfo *TII = STI.getInstrInfo();
3196  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3197  const TargetLowering *TLI = IS->TLI;
3198 
3199  ILPBURRPriorityQueue *PQ =
3200  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3201  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3202  PQ->setScheduleDAG(SD);
3203  return SD;
3204 }
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::SchedulingPriorityQueue::scheduledNode
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:542
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
right
the custom lowered code happens to be right
Definition: README-SSE.txt:480
ScheduleDAG.h
burrListDAGScheduler
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
llvm::ScheduleHazardRecognizer::getMaxLookAhead
unsigned getMaxLookAhead() const
Definition: ScheduleHazardRecognizer.h:43
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
llvm::SelectionDAGISel::TLI
const TargetLowering * TLI
Definition: SelectionDAGISel.h:54
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:72
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::ScheduleDAGTopologicalSort::WillCreateCycle
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
Definition: ScheduleDAG.cpp:703
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::SUnit::isScheduleLow
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
llvm::SDNode::getIROrder
unsigned getIROrder() const
Return the node ordering.
Definition: SelectionDAGNodes.h:719
llvm::TargetRegisterInfo::getCrossCopyRegClass
virtual const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const
Returns a legal register class to copy a register in the specified class to or from.
Definition: TargetRegisterInfo.h:779
llvm::objcarc::Sequence
Sequence
Definition: PtrState.h:41
AvgIPC
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
llvm::ScheduleDAGSDNodes
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
Definition: ScheduleDAGSDNodes.h:46
llvm::TargetInstrInfo::CreateTargetHazardRecognizer
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
Definition: TargetInstrInfo.cpp:1050
FindCallSeqStart
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
Definition: ScheduleDAGRRList.cpp:489
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1223
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::SUnit::isAvailable
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
MCInstrDesc.h
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1222
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
SchedulerRegistry.h
checkSpecialNodes
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
Definition: ScheduleDAGRRList.cpp:1947
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
llvm::SchedulingPriorityQueue::dump
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:537
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:723
InlineAsm.h
DisableSchedVRegCycle
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:116
ErrorHandling.h
canEnableCoalescing
static bool canEnableCoalescing(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2726
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:102
llvm::MCRegisterInfo::getNumRegs
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Definition: MCRegisterInfo.h:491
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::ISD::EH_LABEL
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1033
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
hasVRegCycleUse
static bool hasVRegCycleUse(const SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2458
llvm::SUnit::isCommutable
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:24
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:270
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
DenseMap.h
llvm::SchedulingPriorityQueue::addNode
virtual void addNode(const SUnit *SU)=0
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::SchedulingPriorityQueue::tracksRegPressure
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
TargetInstrInfo.h
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
canClobberReachingPhysRegUse
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
Definition: ScheduleDAGRRList.cpp:2853
llvm::SchedulingPriorityQueue::isReady
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
BUCompareLatency
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2487
llvm::SDep::setSUnit
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ScheduleDAGTopologicalSort::AddPredQueued
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
Definition: ScheduleDAG.cpp:536
MachineRegisterInfo.h
llvm::ISD::INLINEASM
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1025
GetCostForDef
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
Definition: ScheduleDAGRRList.cpp:309
llvm::TargetLoweringBase::getRepRegClassCostFor
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
Definition: TargetLowering.h:920
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
isLiveOut
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
Definition: SIOptimizeExecMasking.cpp:287
llvm::ScheduleDAGTopologicalSort::MarkDirty
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:768
llvm::ScheduleDAGTopologicalSort
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
CommandLine.h
TargetLowering.h
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:632
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
ScheduleHazardRecognizer.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
SelectionDAGNodes.h
llvm::AArch64::RP
@ RP
Definition: AArch64ISelLowering.h:469
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ILPListDAGScheduler
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
CalcNodeSethiUllmanNumber
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
Definition: ScheduleDAGRRList.cpp:1958
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::SUnit::setHeightToAtLeast
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
Definition: ScheduleDAG.cpp:255
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3412
BUHasStall
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2477
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::createILPListDAGScheduler
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
Definition: ScheduleDAGRRList.cpp:3192
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::MCInstrDesc::getImplicitDefs
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:587
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
CheckForLiveRegDefMasked
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
Definition: ScheduleDAGRRList.cpp:1323
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::SchedulingPriorityQueue::empty
virtual bool empty() const =0
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:478
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::InlineAsm::isRegDefKind
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:294
llvm::createSourceListDAGScheduler
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
Definition: ScheduleDAGRRList.cpp:3162
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
ScheduleDAGSDNodes.h
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
closestSucc
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
Definition: ScheduleDAGRRList.cpp:2343
llvm::SUnit::isVRegCycle
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
sourceListDAGScheduler
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:406
llvm::ISD::EntryToken
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
llvm::ScheduleHazardRecognizer::atIssueLimit
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
Definition: ScheduleHazardRecognizer.h:51
llvm::ScheduleDAGSDNodes::RegDefIter::GetValue
MVT GetValue() const
Definition: ScheduleDAGSDNodes.h:150
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::SUnit::hasPhysRegClobbers
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
llvm::SUnit::isCallOp
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
llvm::Sched::RegPressure
@ RegPressure
Definition: TargetLowering.h:100
getNodeRegMask
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
Definition: ScheduleDAGRRList.cpp:1338
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::TargetRegisterInfo::regclasses
iterator_range< regclass_iterator > regclasses() const
Definition: TargetRegisterInfo.h:740
BURRSort
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2538
DisableSchedCriticalPath
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SchedulingPriorityQueue::push
virtual void push(SUnit *U)=0
llvm::MCOperandInfo::isOptionalDef
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:112
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::cl::opt< bool >
llvm::SUnit::isPending
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
llvm::MachineOperand::clobbersPhysReg
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
Definition: MachineOperand.h:626
llvm::ScheduleHazardRecognizer::HazardType
HazardType
Definition: ScheduleHazardRecognizer.h:37
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:419
llvm::InlineAsm::isRegDefEarlyClobberKind
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:297
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:750
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:112
llvm::SUnit::CopyDstRC
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
DisableSchedStalls
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SUnit::NumRegDefsLeft
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:272
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1637
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:756
llvm::ScheduleDAGSDNodes::RegDefIter::GetIdx
unsigned GetIdx() const
Definition: ScheduleDAGSDNodes.h:159
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::SDNode::setNodeId
void setNodeId(int Id)
Set unique node id.
Definition: SelectionDAGNodes.h:716
llvm::DenseMap
Definition: DenseMap.h:716
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:908
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::SchedulingPriorityQueue::hasReadyFilter
bool hasReadyFilter() const
Definition: ScheduleDAG.h:516
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MCRegisterInfo.h
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
ArrayRef.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::SDNode::getNodeId
int getNodeId() const
Return the unique node id.
Definition: SelectionDAGNodes.h:713
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::SUnit::CopySrcRC
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
DisableSchedLiveUses
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
initVRegCycle
static void initVRegCycle(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2422
llvm::zlib::isAvailable
bool isAvailable()
Definition: Compression.cpp:47
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SDNode::getSimpleValueType
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
Definition: SelectionDAGNodes.h:976
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:687
llvm::SchedulingPriorityQueue::pop
virtual SUnit * pop()=0
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::createHybridListDAGScheduler
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
Definition: ScheduleDAGRRList.cpp:3176
llvm::ScheduleHazardRecognizer::isEnabled
bool isEnabled() const
Definition: ScheduleHazardRecognizer.h:45
hasOnlyLiveOutUses
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
Definition: ScheduleDAGRRList.cpp:2394
llvm::ScheduleDAG::StressSched
bool StressSched
Definition: ScheduleDAG.h:569
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
llvm::ScheduleDAG
Definition: ScheduleDAG.h:555
isOperandOf
static bool isOperandOf(const SUnit *SU, SDNode *N)
Definition: ScheduleDAGRRList.cpp:970
SelectionDAGISel.h
llvm::ScheduleDAG::dumpNode
virtual void dumpNode(const SUnit &SU) const =0
calcMaxScratches
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
Definition: ScheduleDAGRRList.cpp:2361
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
IsChainDependent
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
Definition: ScheduleDAGRRList.cpp:440
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
Compiler.h
llvm::InlineAsm::isClobberKind
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:300
llvm::ScheduleHazardRecognizer::EmitInstruction
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: ScheduleHazardRecognizer.h:71
TargetSubtargetInfo.h
getPhysicalRegisterVT
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
Definition: ScheduleDAGRRList.cpp:1276
canClobberPhysRegDefs
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
Definition: ScheduleDAGRRList.cpp:2889
resetVRegCycle
static void resetVRegCycle(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2441
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::SDep::isAssignedRegDep
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::SUnit::SchedulingPref
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
hybridListDAGScheduler
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
hasOnlyLiveInOpers
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
Definition: ScheduleDAGRRList.cpp:2372
llvm::ScheduleDAGSDNodes::RegDefIter
RegDefIter - In place iteration over the values defined by an SUnit.
Definition: ScheduleDAGSDNodes.h:138
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:46
j
return j(j<< 16)
llvm::SchedulingPriorityQueue::remove
virtual void remove(SUnit *SU)=0
llvm::SchedulingPriorityQueue::releaseState
virtual void releaseState()=0
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::SUnit::NumSuccs
unsigned NumSuccs
Definition: ScheduleDAG.h:267
llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MCInstrDesc::hasOptionalDef
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:262
llvm::SUnit::getNode
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
llvm::SUnit::setHeightDirty
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
Definition: ScheduleDAG.cpp:232
ISDOpcodes.h
llvm::RegisterScheduler
Definition: SchedulerRegistry.h:31
llvm::ISD::INLINEASM_BR
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1028
Casting.h
llvm::SchedulingPriorityQueue::setCurCycle
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:546
Disable2AddrHack
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
llvm::ScheduleHazardRecognizer::RecedeCycle
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:109
DisableSchedHeight
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:967
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::SUnit::isTwoAddress
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:744
llvm::ScheduleHazardRecognizer::Reset
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: ScheduleHazardRecognizer.h:67
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:692
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
MaxReorderWindow
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
CodeGen.h
llvm::MCOI::OptionalDef
@ OptionalDef
Definition: MCInstrDesc.h:52
llvm::SUnit::isSucc
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
llvm::SUnit::NodeQueueId
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
llvm::MCRegAliasIterator::isValid
bool isValid() const
Definition: MCRegisterInfo.h:813
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:40
SmallVector.h
DisableSchedPhysRegJoin
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
llvm::InlineAsm::getNumOperandRegisters
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:355
CheckForLiveRegDef
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
Definition: ScheduleDAGRRList.cpp:1297
N
#define N
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
DisableSchedCycles
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
llvm::MVT::Untyped
@ Untyped
Definition: MachineValueType.h:274
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
MachineOperand.h
llvm::ScheduleDAGSDNodes::RegDefIter::GetNode
const SDNode * GetNode() const
Definition: ScheduleDAGSDNodes.h:155
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCOI::TIED_TO
@ TIED_TO
Definition: MCInstrDesc.h:35
llvm::SchedulingPriorityQueue::unscheduledNode
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:544
llvm::SchedulingPriorityQueue::updateNode
virtual void updateNode(const SUnit *SU)=0
llvm::cl::desc
Definition: CommandLine.h:405
llvm::SchedulingPriorityQueue
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:496
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::TargetLoweringBase::getRepRegClassFor
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
Definition: TargetLowering.h:913
raw_ostream.h
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::createBURRListDAGScheduler
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
Definition: ScheduleDAGRRList.cpp:3148
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
DisableSchedRegPressure
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
llvm::ScheduleHazardRecognizer
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
Definition: ScheduleHazardRecognizer.h:25
llvm::MCInstrDesc::getOperandConstraint
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:212
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::InlineAsm::Op_FirstOperand
@ Op_FirstOperand
Definition: InlineAsm.h:220
llvm::SDep::isCtrl
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::ScheduleHazardRecognizer::getHazardType
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Definition: ScheduleHazardRecognizer.h:60
llvm::SchedulingPriorityQueue::initNodes
virtual void initNodes(std::vector< SUnit > &SUnits)=0
TargetRegisterInfo.h
Debug.h
llvm::ScheduleDAGSDNodes::RegDefIter::IsValid
bool IsValid() const
Definition: ScheduleDAGSDNodes.h:148
llvm::SDNode::hasAnyUseOfValue
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
Definition: SelectionDAG.cpp:10778
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::TargetRegisterInfo::getRegPressureLimit
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
Definition: TargetRegisterInfo.h:801
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::MCRegAliasIterator
MCRegAliasIterator enumerates all registers aliasing Reg.
Definition: MCRegisterInfo.h:788
llvm::SDNode::getGluedNode
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Definition: SelectionDAGNodes.h:943
llvm::ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
Definition: ScheduleDAG.cpp:715
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:207
SmallSet.h