LLVM  4.0.0
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/PriorityQueue.h"
22 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/Support/Debug.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "misched"
37 
38 namespace llvm {
39 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
40  cl::desc("Force top-down list scheduling"));
41 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
42  cl::desc("Force bottom-up list scheduling"));
44 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
45  cl::desc("Print critical path length to stdout"));
46 }
47 
48 #ifndef NDEBUG
49 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
50  cl::desc("Pop up a window to show MISched dags after they are processed"));
51 
52 /// In some situations a few uninteresting nodes depend on nearly all other
53 /// nodes in the graph, provide a cutoff to hide them.
54 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
55  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
56 
57 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
58  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
59 
60 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
61  cl::desc("Only schedule this function"));
62 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
63  cl::desc("Only schedule this MBB#"));
64 #else
65 static bool ViewMISchedDAGs = false;
66 #endif // NDEBUG
67 
68 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
69 /// size of the ready lists.
70 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
71  cl::desc("Limit ready list to N instructions"), cl::init(256));
72 
73 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
74  cl::desc("Enable register pressure scheduling."), cl::init(true));
75 
76 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
77  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
78 
79 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
80  cl::desc("Enable memop clustering."),
81  cl::init(true));
82 
83 // Experimental heuristics
84 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
85  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
86 
87 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
88  cl::desc("Verify machine instrs before and after machine scheduling"));
89 
90 // DAG subtrees must have at least this many nodes.
91 static const unsigned MinSubtreeSize = 8;
92 
93 // Pin the vtables to this file.
94 void MachineSchedStrategy::anchor() {}
95 void ScheduleDAGMutation::anchor() {}
96 
97 //===----------------------------------------------------------------------===//
98 // Machine Instruction Scheduling Pass and Registry
99 //===----------------------------------------------------------------------===//
100 
102  MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
104 }
105 
107  delete RegClassInfo;
108 }
109 
110 namespace {
111 /// Base class for a machine scheduler class that can run at any point.
112 class MachineSchedulerBase : public MachineSchedContext,
113  public MachineFunctionPass {
114 public:
115  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
116 
117  void print(raw_ostream &O, const Module* = nullptr) const override;
118 
119 protected:
120  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
121 };
122 
123 /// MachineScheduler runs after coalescing and before register allocation.
124 class MachineScheduler : public MachineSchedulerBase {
125 public:
126  MachineScheduler();
127 
128  void getAnalysisUsage(AnalysisUsage &AU) const override;
129 
130  bool runOnMachineFunction(MachineFunction&) override;
131 
132  static char ID; // Class identification, replacement for typeinfo
133 
134 protected:
135  ScheduleDAGInstrs *createMachineScheduler();
136 };
137 
138 /// PostMachineScheduler runs after shortly before code emission.
139 class PostMachineScheduler : public MachineSchedulerBase {
140 public:
141  PostMachineScheduler();
142 
143  void getAnalysisUsage(AnalysisUsage &AU) const override;
144 
145  bool runOnMachineFunction(MachineFunction&) override;
146 
147  static char ID; // Class identification, replacement for typeinfo
148 
149 protected:
150  ScheduleDAGInstrs *createPostMachineScheduler();
151 };
152 } // namespace
153 
154 char MachineScheduler::ID = 0;
155 
157 
158 INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
159  "Machine Instruction Scheduler", false, false)
163 INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
165 
166 MachineScheduler::MachineScheduler()
167 : MachineSchedulerBase(ID) {
169 }
170 
171 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
172  AU.setPreservesCFG();
177  AU.addRequired<SlotIndexes>();
182 }
183 
184 char PostMachineScheduler::ID = 0;
185 
187 
188 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
189  "PostRA Machine Instruction Scheduler", false, false)
190 
191 PostMachineScheduler::PostMachineScheduler()
192 : MachineSchedulerBase(ID) {
194 }
195 
196 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
197  AU.setPreservesCFG();
202 }
203 
205 
206 /// A dummy default scheduler factory indicates whether the scheduler
207 /// is overridden on the command line.
209  return nullptr;
210 }
211 
212 /// MachineSchedOpt allows command line selection of the scheduler.
215 MachineSchedOpt("misched",
217  cl::desc("Machine instruction scheduler to use"));
218 
220 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
222 
224  "enable-misched",
225  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
226  cl::Hidden);
227 
229  "enable-post-misched",
230  cl::desc("Enable the post-ra machine instruction scheduling pass."),
231  cl::init(true), cl::Hidden);
232 
233 /// Decrement this iterator until reaching the top or a non-debug instr.
237  assert(I != Beg && "reached the top of the region, cannot decrement");
238  while (--I != Beg) {
239  if (!I->isDebugValue())
240  break;
241  }
242  return I;
243 }
244 
245 /// Non-const version.
251 }
252 
253 /// If this iterator is a debug value, increment until reaching the End or a
254 /// non-debug instruction.
258  for(; I != End; ++I) {
259  if (!I->isDebugValue())
260  break;
261  }
262  return I;
263 }
264 
265 /// Non-const version.
271 }
272 
273 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
274 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
275  // Select the scheduler, or set the default.
277  if (Ctor != useDefaultMachineSched)
278  return Ctor(this);
279 
280  // Get the default scheduler set by the target for this function.
281  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
282  if (Scheduler)
283  return Scheduler;
284 
285  // Default to GenericScheduler.
286  return createGenericSchedLive(this);
287 }
288 
289 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
290 /// the caller. We don't have a command line option to override the postRA
291 /// scheduler. The Target must configure it.
292 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
293  // Get the postRA scheduler set by the target for this function.
294  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
295  if (Scheduler)
296  return Scheduler;
297 
298  // Default to GenericScheduler.
299  return createGenericSchedPostRA(this);
300 }
301 
302 /// Top-level MachineScheduler pass driver.
303 ///
304 /// Visit blocks in function order. Divide each block into scheduling regions
305 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
306 /// consistent with the DAG builder, which traverses the interior of the
307 /// scheduling regions bottom-up.
308 ///
309 /// This design avoids exposing scheduling boundaries to the DAG builder,
310 /// simplifying the DAG builder's support for "special" target instructions.
311 /// At the same time the design allows target schedulers to operate across
312 /// scheduling boundaries, for example to bundle the boudary instructions
313 /// without reordering them. This creates complexity, because the target
314 /// scheduler must update the RegionBegin and RegionEnd positions cached by
315 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
316 /// design would be to split blocks at scheduling boundaries, but LLVM has a
317 /// general bias against block splitting purely for implementation simplicity.
318 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
319  if (skipFunction(*mf.getFunction()))
320  return false;
321 
322  if (EnableMachineSched.getNumOccurrences()) {
323  if (!EnableMachineSched)
324  return false;
325  } else if (!mf.getSubtarget().enableMachineScheduler())
326  return false;
327 
328  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
329 
330  // Initialize the context of the pass.
331  MF = &mf;
332  MLI = &getAnalysis<MachineLoopInfo>();
333  MDT = &getAnalysis<MachineDominatorTree>();
334  PassConfig = &getAnalysis<TargetPassConfig>();
335  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
336 
337  LIS = &getAnalysis<LiveIntervals>();
338 
339  if (VerifyScheduling) {
340  DEBUG(LIS->dump());
341  MF->verify(this, "Before machine scheduling.");
342  }
343  RegClassInfo->runOnMachineFunction(*MF);
344 
345  // Instantiate the selected scheduler for this target, function, and
346  // optimization level.
347  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
348  scheduleRegions(*Scheduler, false);
349 
350  DEBUG(LIS->dump());
351  if (VerifyScheduling)
352  MF->verify(this, "After machine scheduling.");
353  return true;
354 }
355 
356 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
357  if (skipFunction(*mf.getFunction()))
358  return false;
359 
360  if (EnablePostRAMachineSched.getNumOccurrences()) {
362  return false;
363  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
364  DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
365  return false;
366  }
367  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
368 
369  // Initialize the context of the pass.
370  MF = &mf;
371  PassConfig = &getAnalysis<TargetPassConfig>();
372 
373  if (VerifyScheduling)
374  MF->verify(this, "Before post machine scheduling.");
375 
376  // Instantiate the selected scheduler for this target, function, and
377  // optimization level.
378  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
379  scheduleRegions(*Scheduler, true);
380 
381  if (VerifyScheduling)
382  MF->verify(this, "After post machine scheduling.");
383  return true;
384 }
385 
386 /// Return true of the given instruction should not be included in a scheduling
387 /// region.
388 ///
389 /// MachineScheduler does not currently support scheduling across calls. To
390 /// handle calls, the DAG builder needs to be modified to create register
391 /// anti/output dependencies on the registers clobbered by the call's regmask
392 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
393 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
394 /// the boundary, but there would be no benefit to postRA scheduling across
395 /// calls this late anyway.
398  MachineFunction *MF,
399  const TargetInstrInfo *TII) {
400  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
401 }
402 
403 /// Main driver for both MachineScheduler and PostMachineScheduler.
404 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
405  bool FixKillFlags) {
406  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
407 
408  // Visit all machine basic blocks.
409  //
410  // TODO: Visit blocks in global postorder or postorder within the bottom-up
411  // loop tree. Then we can optionally compute global RegPressure.
412  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
413  MBB != MBBEnd; ++MBB) {
414 
415  Scheduler.startBlock(&*MBB);
416 
417 #ifndef NDEBUG
418  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
419  continue;
420  if (SchedOnlyBlock.getNumOccurrences()
421  && (int)SchedOnlyBlock != MBB->getNumber())
422  continue;
423 #endif
424 
425  // Break the block into scheduling regions [I, RegionEnd), and schedule each
426  // region as soon as it is discovered. RegionEnd points the scheduling
427  // boundary at the bottom of the region. The DAG does not include RegionEnd,
428  // but the region does (i.e. the next RegionEnd is above the previous
429  // RegionBegin). If the current block has no terminator then RegionEnd ==
430  // MBB->end() for the bottom region.
431  //
432  // The Scheduler may insert instructions during either schedule() or
433  // exitRegion(), even for empty regions. So the local iterators 'I' and
434  // 'RegionEnd' are invalid across these calls.
435  //
436  // MBB::size() uses instr_iterator to count. Here we need a bundle to count
437  // as a single instruction.
438  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
439  RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
440 
441  // Avoid decrementing RegionEnd for blocks with no terminator.
442  if (RegionEnd != MBB->end() ||
443  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
444  --RegionEnd;
445  }
446 
447  // The next region starts above the previous region. Look backward in the
448  // instruction stream until we find the nearest boundary.
449  unsigned NumRegionInstrs = 0;
450  MachineBasicBlock::iterator I = RegionEnd;
451  for (;I != MBB->begin(); --I) {
452  MachineInstr &MI = *std::prev(I);
453  if (isSchedBoundary(&MI, &*MBB, MF, TII))
454  break;
455  if (!MI.isDebugValue())
456  ++NumRegionInstrs;
457  }
458  // Notify the scheduler of the region, even if we may skip scheduling
459  // it. Perhaps it still needs to be bundled.
460  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
461 
462  // Skip empty scheduling regions (0 or 1 schedulable instructions).
463  if (I == RegionEnd || I == std::prev(RegionEnd)) {
464  // Close the current region. Bundle the terminator if needed.
465  // This invalidates 'RegionEnd' and 'I'.
466  Scheduler.exitRegion();
467  continue;
468  }
469  DEBUG(dbgs() << "********** MI Scheduling **********\n");
470  DEBUG(dbgs() << MF->getName()
471  << ":BB#" << MBB->getNumber() << " " << MBB->getName()
472  << "\n From: " << *I << " To: ";
473  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
474  else dbgs() << "End";
475  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
477  errs() << MF->getName();
478  errs() << ":BB# " << MBB->getNumber();
479  errs() << " " << MBB->getName() << " \n";
480  }
481 
482  // Schedule a region: possibly reorder instructions.
483  // This invalidates 'RegionEnd' and 'I'.
484  Scheduler.schedule();
485 
486  // Close the current region.
487  Scheduler.exitRegion();
488 
489  // Scheduling has invalidated the current iterator 'I'. Ask the
490  // scheduler for the top of it's scheduled region.
491  RegionEnd = Scheduler.begin();
492  }
493  Scheduler.finishBlock();
494  // FIXME: Ideally, no further passes should rely on kill flags. However,
495  // thumb2 size reduction is currently an exception, so the PostMIScheduler
496  // needs to do this.
497  if (FixKillFlags)
498  Scheduler.fixupKills(&*MBB);
499  }
500  Scheduler.finalizeSchedule();
501 }
502 
503 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
504  // unimplemented
505 }
506 
509  dbgs() << "Queue " << Name << ": ";
510  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
511  dbgs() << Queue[i]->NodeNum << " ";
512  dbgs() << "\n";
513 }
514 
515 //===----------------------------------------------------------------------===//
516 // ScheduleDAGMI - Basic machine instruction scheduling. This is
517 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
518 // virtual registers.
519 // ===----------------------------------------------------------------------===/
520 
521 // Provide a vtable anchor.
523 }
524 
525 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
526  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
527 }
528 
529 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
530  if (SuccSU != &ExitSU) {
531  // Do not use WillCreateCycle, it assumes SD scheduling.
532  // If Pred is reachable from Succ, then the edge creates a cycle.
533  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
534  return false;
535  Topo.AddPred(SuccSU, PredDep.getSUnit());
536  }
537  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
538  // Return true regardless of whether a new edge needed to be inserted.
539  return true;
540 }
541 
542 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
543 /// NumPredsLeft reaches zero, release the successor node.
544 ///
545 /// FIXME: Adjust SuccSU height based on MinLatency.
546 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
547  SUnit *SuccSU = SuccEdge->getSUnit();
548 
549  if (SuccEdge->isWeak()) {
550  --SuccSU->WeakPredsLeft;
551  if (SuccEdge->isCluster())
552  NextClusterSucc = SuccSU;
553  return;
554  }
555 #ifndef NDEBUG
556  if (SuccSU->NumPredsLeft == 0) {
557  dbgs() << "*** Scheduling failed! ***\n";
558  SuccSU->dump(this);
559  dbgs() << " has been released too many times!\n";
560  llvm_unreachable(nullptr);
561  }
562 #endif
563  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
564  // CurrCycle may have advanced since then.
565  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
566  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
567 
568  --SuccSU->NumPredsLeft;
569  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
570  SchedImpl->releaseTopNode(SuccSU);
571 }
572 
573 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
575  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
576  I != E; ++I) {
577  releaseSucc(SU, &*I);
578  }
579 }
580 
581 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
582 /// NumSuccsLeft reaches zero, release the predecessor node.
583 ///
584 /// FIXME: Adjust PredSU height based on MinLatency.
585 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
586  SUnit *PredSU = PredEdge->getSUnit();
587 
588  if (PredEdge->isWeak()) {
589  --PredSU->WeakSuccsLeft;
590  if (PredEdge->isCluster())
591  NextClusterPred = PredSU;
592  return;
593  }
594 #ifndef NDEBUG
595  if (PredSU->NumSuccsLeft == 0) {
596  dbgs() << "*** Scheduling failed! ***\n";
597  PredSU->dump(this);
598  dbgs() << " has been released too many times!\n";
599  llvm_unreachable(nullptr);
600  }
601 #endif
602  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
603  // CurrCycle may have advanced since then.
604  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
605  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
606 
607  --PredSU->NumSuccsLeft;
608  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
609  SchedImpl->releaseBottomNode(PredSU);
610 }
611 
612 /// releasePredecessors - Call releasePred on each of SU's predecessors.
614  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
615  I != E; ++I) {
616  releasePred(SU, &*I);
617  }
618 }
619 
620 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
621 /// crossing a scheduling boundary. [begin, end) includes all instructions in
622 /// the region, including the boundary itself and single-instruction regions
623 /// that don't get scheduled.
627  unsigned regioninstrs)
628 {
629  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
630 
631  SchedImpl->initPolicy(begin, end, regioninstrs);
632 }
633 
634 /// This is normally called from the main scheduler loop but may also be invoked
635 /// by the scheduling strategy to perform additional code motion.
637  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
638  // Advance RegionBegin if the first instruction moves down.
639  if (&*RegionBegin == MI)
640  ++RegionBegin;
641 
642  // Update the instruction stream.
643  BB->splice(InsertPos, BB, MI);
644 
645  // Update LiveIntervals
646  if (LIS)
647  LIS->handleMove(*MI, /*UpdateFlags=*/true);
648 
649  // Recede RegionBegin if an instruction moves above the first.
650  if (RegionBegin == InsertPos)
651  RegionBegin = MI;
652 }
653 
655 #ifndef NDEBUG
658  return false;
659  }
661 #endif
662  return true;
663 }
664 
665 /// Per-region scheduling driver, called back from
666 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
667 /// does not consider liveness or register pressure. It is useful for PostRA
668 /// scheduling and potentially other custom schedulers.
670  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
671  DEBUG(SchedImpl->dumpPolicy());
672 
673  // Build the DAG.
674  buildSchedGraph(AA);
675 
677 
678  postprocessDAG();
679 
680  SmallVector<SUnit*, 8> TopRoots, BotRoots;
681  findRootsAndBiasEdges(TopRoots, BotRoots);
682 
683  // Initialize the strategy before modifying the DAG.
684  // This may initialize a DFSResult to be used for queue priority.
685  SchedImpl->initialize(this);
686 
687  DEBUG(
688  if (EntrySU.getInstr() != nullptr)
689  EntrySU.dumpAll(this);
690  for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
691  SUnits[su].dumpAll(this);
692  if (ExitSU.getInstr() != nullptr)
693  ExitSU.dumpAll(this);
694  );
695  if (ViewMISchedDAGs) viewGraph();
696 
697  // Initialize ready queues now that the DAG and priority data are finalized.
698  initQueues(TopRoots, BotRoots);
699 
700  bool IsTopNode = false;
701  while (true) {
702  DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
703  SUnit *SU = SchedImpl->pickNode(IsTopNode);
704  if (!SU) break;
705 
706  assert(!SU->isScheduled && "Node already scheduled");
707  if (!checkSchedLimit())
708  break;
709 
710  MachineInstr *MI = SU->getInstr();
711  if (IsTopNode) {
712  assert(SU->isTopReady() && "node still has unscheduled dependencies");
713  if (&*CurrentTop == MI)
715  else
717  } else {
718  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
721  if (&*priorII == MI)
722  CurrentBottom = priorII;
723  else {
724  if (&*CurrentTop == MI)
725  CurrentTop = nextIfDebug(++CurrentTop, priorII);
727  CurrentBottom = MI;
728  }
729  }
730  // Notify the scheduling strategy before updating the DAG.
731  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
732  // runs, it can then use the accurate ReadyCycle time to determine whether
733  // newly released nodes can move to the readyQ.
734  SchedImpl->schedNode(SU, IsTopNode);
735 
736  updateQueues(SU, IsTopNode);
737  }
738  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
739 
741 
742  DEBUG({
743  unsigned BBNum = begin()->getParent()->getNumber();
744  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
745  dumpSchedule();
746  dbgs() << '\n';
747  });
748 }
749 
750 /// Apply each ScheduleDAGMutation step in order.
752  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
753  Mutations[i]->apply(this);
754  }
755 }
756 
757 void ScheduleDAGMI::
759  SmallVectorImpl<SUnit*> &BotRoots) {
760  for (std::vector<SUnit>::iterator
761  I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
762  SUnit *SU = &(*I);
763  assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
764 
765  // Order predecessors so DFSResult follows the critical path.
766  SU->biasCriticalPath();
767 
768  // A SUnit is ready to top schedule if it has no predecessors.
769  if (!I->NumPredsLeft)
770  TopRoots.push_back(SU);
771  // A SUnit is ready to bottom schedule if it has no successors.
772  if (!I->NumSuccsLeft)
773  BotRoots.push_back(SU);
774  }
776 }
777 
778 /// Identify DAG roots and setup scheduler queues.
780  ArrayRef<SUnit*> BotRoots) {
781  NextClusterSucc = nullptr;
782  NextClusterPred = nullptr;
783 
784  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
785  //
786  // Nodes with unreleased weak edges can still be roots.
787  // Release top roots in forward order.
789  I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
790  SchedImpl->releaseTopNode(*I);
791  }
792  // Release bottom roots in reverse order so the higher priority nodes appear
793  // first. This is more natural and slightly more efficient.
795  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
796  SchedImpl->releaseBottomNode(*I);
797  }
798 
801 
802  SchedImpl->registerRoots();
803 
804  // Advance past initial DebugValues.
807 }
808 
809 /// Update scheduler queues after scheduling an instruction.
810 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
811  // Release dependent instructions for scheduling.
812  if (IsTopNode)
813  releaseSuccessors(SU);
814  else
816 
817  SU->isScheduled = true;
818 }
819 
820 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
822  // If first instruction was a DBG_VALUE then put it back.
823  if (FirstDbgValue) {
826  }
827 
828  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
829  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
830  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
831  MachineInstr *DbgValue = P.first;
832  MachineBasicBlock::iterator OrigPrevMI = P.second;
833  if (&*RegionBegin == DbgValue)
834  ++RegionBegin;
835  BB->splice(++OrigPrevMI, BB, DbgValue);
836  if (OrigPrevMI == std::prev(RegionEnd))
837  RegionEnd = DbgValue;
838  }
839  DbgValues.clear();
840  FirstDbgValue = nullptr;
841 }
842 
843 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
845  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
846  if (SUnit *SU = getSUnit(&(*MI)))
847  SU->dump(this);
848  else
849  dbgs() << "Missing SUnit\n";
850  }
851 }
852 #endif
853 
854 //===----------------------------------------------------------------------===//
855 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
856 // preservation.
857 //===----------------------------------------------------------------------===//
858 
860  delete DFSResult;
861 }
862 
864  const MachineInstr &MI = *SU.getInstr();
865  for (const MachineOperand &MO : MI.operands()) {
866  if (!MO.isReg())
867  continue;
868  if (!MO.readsReg())
869  continue;
870  if (TrackLaneMasks && !MO.isUse())
871  continue;
872 
873  unsigned Reg = MO.getReg();
875  continue;
876 
877  // Ignore re-defs.
878  if (TrackLaneMasks) {
879  bool FoundDef = false;
880  for (const MachineOperand &MO2 : MI.operands()) {
881  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
882  FoundDef = true;
883  break;
884  }
885  }
886  if (FoundDef)
887  continue;
888  }
889 
890  // Record this local VReg use.
892  for (; UI != VRegUses.end(); ++UI) {
893  if (UI->SU == &SU)
894  break;
895  }
896  if (UI == VRegUses.end())
898  }
899 }
900 
901 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
902 /// crossing a scheduling boundary. [begin, end) includes all instructions in
903 /// the region, including the boundary itself and single-instruction regions
904 /// that don't get scheduled.
908  unsigned regioninstrs)
909 {
910  // ScheduleDAGMI initializes SchedImpl's per-region policy.
911  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
912 
913  // For convenience remember the end of the liveness region.
914  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
915 
917 
918  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
919  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
920 
922  "ShouldTrackLaneMasks requires ShouldTrackPressure");
923 }
924 
925 // Setup the register pressure trackers for the top scheduled top and bottom
926 // scheduled regions.
928  VRegUses.clear();
930  for (SUnit &SU : SUnits)
931  collectVRegUses(SU);
932 
934  ShouldTrackLaneMasks, false);
936  ShouldTrackLaneMasks, false);
937 
938  // Close the RPTracker to finalize live ins.
940 
941  DEBUG(RPTracker.dump());
942 
943  // Initialize the live ins and live outs.
946 
947  // Close one end of the tracker so we can call
948  // getMaxUpward/DownwardPressureDelta before advancing across any
949  // instructions. This converts currently live regs into live ins/outs.
952 
954  if (!BotRPTracker.getLiveThru().empty()) {
956  DEBUG(dbgs() << "Live Thru: ";
958  };
959 
960  // For each live out vreg reduce the pressure change associated with other
961  // uses of the same vreg below the live-out reaching def.
963 
964  // Account for liveness generated by the region boundary.
965  if (LiveRegionEnd != RegionEnd) {
967  BotRPTracker.recede(&LiveUses);
968  updatePressureDiffs(LiveUses);
969  }
970 
971  DEBUG(
972  dbgs() << "Top Pressure:\n";
974  dbgs() << "Bottom Pressure:\n";
976  );
977 
978  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
979 
980  // Cache the list of excess pressure sets in this region. This will also track
981  // the max pressure in the scheduled code for these sets.
982  RegionCriticalPSets.clear();
983  const std::vector<unsigned> &RegionPressure =
985  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
986  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
987  if (RegionPressure[i] > Limit) {
989  << " Limit " << Limit
990  << " Actual " << RegionPressure[i] << "\n");
992  }
993  }
994  DEBUG(dbgs() << "Excess PSets: ";
995  for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
997  RegionCriticalPSets[i].getPSet()) << " ";
998  dbgs() << "\n");
999 }
1000 
1003  const std::vector<unsigned> &NewMaxPressure) {
1004  const PressureDiff &PDiff = getPressureDiff(SU);
1005  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1006  for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
1007  I != E; ++I) {
1008  if (!I->isValid())
1009  break;
1010  unsigned ID = I->getPSet();
1011  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1012  ++CritIdx;
1013  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1014  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1015  && NewMaxPressure[ID] <= INT16_MAX)
1016  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1017  }
1018  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1019  if (NewMaxPressure[ID] >= Limit - 2) {
1020  DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1021  << NewMaxPressure[ID]
1022  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1023  << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1024  }
1025  }
1026 }
1027 
1028 /// Update the PressureDiff array for liveness after scheduling this
1029 /// instruction.
1031  ArrayRef<RegisterMaskPair> LiveUses) {
1032  for (const RegisterMaskPair &P : LiveUses) {
1033  unsigned Reg = P.RegUnit;
1034  /// FIXME: Currently assuming single-use physregs.
1035  if (!TRI->isVirtualRegister(Reg))
1036  continue;
1037 
1038  if (ShouldTrackLaneMasks) {
1039  // If the register has just become live then other uses won't change
1040  // this fact anymore => decrement pressure.
1041  // If the register has just become dead then other uses make it come
1042  // back to life => increment pressure.
1043  bool Decrement = P.LaneMask.any();
1044 
1045  for (const VReg2SUnit &V2SU
1046  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1047  SUnit &SU = *V2SU.SU;
1048  if (SU.isScheduled || &SU == &ExitSU)
1049  continue;
1050 
1051  PressureDiff &PDiff = getPressureDiff(&SU);
1052  PDiff.addPressureChange(Reg, Decrement, &MRI);
1053  DEBUG(
1054  dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1055  << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1056  << ' ' << *SU.getInstr();
1057  dbgs() << " to ";
1058  PDiff.dump(*TRI);
1059  );
1060  }
1061  } else {
1062  assert(P.LaneMask.any());
1063  DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
1064  // This may be called before CurrentBottom has been initialized. However,
1065  // BotRPTracker must have a valid position. We want the value live into the
1066  // instruction or live out of the block, so ask for the previous
1067  // instruction's live-out.
1068  const LiveInterval &LI = LIS->getInterval(Reg);
1069  VNInfo *VNI;
1072  if (I == BB->end())
1073  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1074  else {
1076  VNI = LRQ.valueIn();
1077  }
1078  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1079  assert(VNI && "No live value at use.");
1080  for (const VReg2SUnit &V2SU
1081  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1082  SUnit *SU = V2SU.SU;
1083  // If this use comes before the reaching def, it cannot be a last use,
1084  // so decrease its pressure change.
1085  if (!SU->isScheduled && SU != &ExitSU) {
1086  LiveQueryResult LRQ =
1087  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1088  if (LRQ.valueIn() == VNI) {
1089  PressureDiff &PDiff = getPressureDiff(SU);
1090  PDiff.addPressureChange(Reg, true, &MRI);
1091  DEBUG(
1092  dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1093  << *SU->getInstr();
1094  dbgs() << " to ";
1095  PDiff.dump(*TRI);
1096  );
1097  }
1098  }
1099  }
1100  }
1101  }
1102 }
1103 
1104 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1105 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1106 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1107 ///
1108 /// This is a skeletal driver, with all the functionality pushed into helpers,
1109 /// so that it can be easily extended by experimental schedulers. Generally,
1110 /// implementing MachineSchedStrategy should be sufficient to implement a new
1111 /// scheduling algorithm. However, if a scheduler further subclasses
1112 /// ScheduleDAGMILive then it will want to override this virtual method in order
1113 /// to update any specialized state.
1115  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1116  DEBUG(SchedImpl->dumpPolicy());
1118 
1120 
1121  postprocessDAG();
1122 
1123  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1124  findRootsAndBiasEdges(TopRoots, BotRoots);
1125 
1126  // Initialize the strategy before modifying the DAG.
1127  // This may initialize a DFSResult to be used for queue priority.
1128  SchedImpl->initialize(this);
1129 
1130  DEBUG(
1131  if (EntrySU.getInstr() != nullptr)
1132  EntrySU.dumpAll(this);
1133  for (const SUnit &SU : SUnits) {
1134  SU.dumpAll(this);
1135  if (ShouldTrackPressure) {
1136  dbgs() << " Pressure Diff : ";
1137  getPressureDiff(&SU).dump(*TRI);
1138  }
1139  dbgs() << '\n';
1140  }
1141  if (ExitSU.getInstr() != nullptr)
1142  ExitSU.dumpAll(this);
1143  );
1144  if (ViewMISchedDAGs) viewGraph();
1145 
1146  // Initialize ready queues now that the DAG and priority data are finalized.
1147  initQueues(TopRoots, BotRoots);
1148 
1149  bool IsTopNode = false;
1150  while (true) {
1151  DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1152  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1153  if (!SU) break;
1154 
1155  assert(!SU->isScheduled && "Node already scheduled");
1156  if (!checkSchedLimit())
1157  break;
1158 
1159  scheduleMI(SU, IsTopNode);
1160 
1161  if (DFSResult) {
1162  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1163  if (!ScheduledTrees.test(SubtreeID)) {
1164  ScheduledTrees.set(SubtreeID);
1165  DFSResult->scheduleTree(SubtreeID);
1166  SchedImpl->scheduleTree(SubtreeID);
1167  }
1168  }
1169 
1170  // Notify the scheduling strategy after updating the DAG.
1171  SchedImpl->schedNode(SU, IsTopNode);
1172 
1173  updateQueues(SU, IsTopNode);
1174  }
1175  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1176 
1177  placeDebugValues();
1178 
1179  DEBUG({
1180  unsigned BBNum = begin()->getParent()->getNumber();
1181  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1182  dumpSchedule();
1183  dbgs() << '\n';
1184  });
1185 }
1186 
1187 /// Build the DAG and setup three register pressure trackers.
1189  if (!ShouldTrackPressure) {
1190  RPTracker.reset();
1191  RegionCriticalPSets.clear();
1192  buildSchedGraph(AA);
1193  return;
1194  }
1195 
1196  // Initialize the register pressure tracker used by buildSchedGraph.
1198  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1199 
1200  // Account for liveness generate by the region boundary.
1201  if (LiveRegionEnd != RegionEnd)
1202  RPTracker.recede();
1203 
1204  // Build the DAG, and compute current register pressure.
1206 
1207  // Initialize top/bottom trackers after computing region pressure.
1208  initRegPressure();
1209 }
1210 
1212  if (!DFSResult)
1213  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1214  DFSResult->clear();
1216  DFSResult->resize(SUnits.size());
1219 }
1220 
1221 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1222 /// only provides the critical path for single block loops. To handle loops that
1223 /// span blocks, we could use the vreg path latencies provided by
1224 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1225 /// available for use in the scheduler.
1226 ///
1227 /// The cyclic path estimation identifies a def-use pair that crosses the back
1228 /// edge and considers the depth and height of the nodes. For example, consider
1229 /// the following instruction sequence where each instruction has unit latency
1230 /// and defines an epomymous virtual register:
1231 ///
1232 /// a->b(a,c)->c(b)->d(c)->exit
1233 ///
1234 /// The cyclic critical path is a two cycles: b->c->b
1235 /// The acyclic critical path is four cycles: a->b->c->d->exit
1236 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1237 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1238 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1239 /// LiveInDepth = depth(b) = len(a->b) = 1
1240 ///
1241 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1242 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1243 /// CyclicCriticalPath = min(2, 2) = 2
1244 ///
1245 /// This could be relevant to PostRA scheduling, but is currently implemented
1246 /// assuming LiveIntervals.
1248  // This only applies to single block loop.
1249  if (!BB->isSuccessor(BB))
1250  return 0;
1251 
1252  unsigned MaxCyclicLatency = 0;
1253  // Visit each live out vreg def to find def/use pairs that cross iterations.
1255  unsigned Reg = P.RegUnit;
1256  if (!TRI->isVirtualRegister(Reg))
1257  continue;
1258  const LiveInterval &LI = LIS->getInterval(Reg);
1259  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1260  if (!DefVNI)
1261  continue;
1262 
1263  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1264  const SUnit *DefSU = getSUnit(DefMI);
1265  if (!DefSU)
1266  continue;
1267 
1268  unsigned LiveOutHeight = DefSU->getHeight();
1269  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1270  // Visit all local users of the vreg def.
1271  for (const VReg2SUnit &V2SU
1272  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1273  SUnit *SU = V2SU.SU;
1274  if (SU == &ExitSU)
1275  continue;
1276 
1277  // Only consider uses of the phi.
1279  if (!LRQ.valueIn()->isPHIDef())
1280  continue;
1281 
1282  // Assume that a path spanning two iterations is a cycle, which could
1283  // overestimate in strange cases. This allows cyclic latency to be
1284  // estimated as the minimum slack of the vreg's depth or height.
1285  unsigned CyclicLatency = 0;
1286  if (LiveOutDepth > SU->getDepth())
1287  CyclicLatency = LiveOutDepth - SU->getDepth();
1288 
1289  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1290  if (LiveInHeight > LiveOutHeight) {
1291  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1292  CyclicLatency = LiveInHeight - LiveOutHeight;
1293  } else
1294  CyclicLatency = 0;
1295 
1296  DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1297  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1298  if (CyclicLatency > MaxCyclicLatency)
1299  MaxCyclicLatency = CyclicLatency;
1300  }
1301  }
1302  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1303  return MaxCyclicLatency;
1304 }
1305 
1306 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1307 /// the Top RP tracker in case the region beginning has changed.
1309  ArrayRef<SUnit*> BotRoots) {
1310  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1311  if (ShouldTrackPressure) {
1312  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1314  }
1315 }
1316 
1317 /// Move an instruction and update register pressure.
1318 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1319  // Move the instruction to its new location in the instruction stream.
1320  MachineInstr *MI = SU->getInstr();
1321 
1322  if (IsTopNode) {
1323  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1324  if (&*CurrentTop == MI)
1326  else {
1328  TopRPTracker.setPos(MI);
1329  }
1330 
1331  if (ShouldTrackPressure) {
1332  // Update top scheduled pressure.
1333  RegisterOperands RegOpers;
1334  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1335  if (ShouldTrackLaneMasks) {
1336  // Adjust liveness and add missing dead+read-undef flags.
1337  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1338  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1339  } else {
1340  // Adjust for missing dead-def flags.
1341  RegOpers.detectDeadDefs(*MI, *LIS);
1342  }
1343 
1344  TopRPTracker.advance(RegOpers);
1345  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1346  DEBUG(
1347  dbgs() << "Top Pressure:\n";
1349  );
1350 
1352  }
1353  } else {
1354  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1355  MachineBasicBlock::iterator priorII =
1357  if (&*priorII == MI)
1358  CurrentBottom = priorII;
1359  else {
1360  if (&*CurrentTop == MI) {
1361  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1363  }
1365  CurrentBottom = MI;
1366  }
1367  if (ShouldTrackPressure) {
1368  RegisterOperands RegOpers;
1369  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1370  if (ShouldTrackLaneMasks) {
1371  // Adjust liveness and add missing dead+read-undef flags.
1372  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1373  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1374  } else {
1375  // Adjust for missing dead-def flags.
1376  RegOpers.detectDeadDefs(*MI, *LIS);
1377  }
1378 
1381  BotRPTracker.recede(RegOpers, &LiveUses);
1382  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1383  DEBUG(
1384  dbgs() << "Bottom Pressure:\n";
1386  );
1387 
1389  updatePressureDiffs(LiveUses);
1390  }
1391  }
1392 }
1393 
1394 //===----------------------------------------------------------------------===//
1395 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1396 //===----------------------------------------------------------------------===//
1397 
1398 namespace {
1399 /// \brief Post-process the DAG to create cluster edges between neighboring
1400 /// loads or between neighboring stores.
1401 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1402  struct MemOpInfo {
1403  SUnit *SU;
1404  unsigned BaseReg;
1405  int64_t Offset;
1406  MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1407  : SU(su), BaseReg(reg), Offset(ofs) {}
1408 
1409  bool operator<(const MemOpInfo&RHS) const {
1410  return std::tie(BaseReg, Offset, SU->NodeNum) <
1411  std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1412  }
1413  };
1414 
1415  const TargetInstrInfo *TII;
1416  const TargetRegisterInfo *TRI;
1417  bool IsLoad;
1418 
1419 public:
1420  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1421  const TargetRegisterInfo *tri, bool IsLoad)
1422  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1423 
1424  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1425 
1426 protected:
1427  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1428 };
1429 
1430 class StoreClusterMutation : public BaseMemOpClusterMutation {
1431 public:
1432  StoreClusterMutation(const TargetInstrInfo *tii,
1433  const TargetRegisterInfo *tri)
1434  : BaseMemOpClusterMutation(tii, tri, false) {}
1435 };
1436 
1437 class LoadClusterMutation : public BaseMemOpClusterMutation {
1438 public:
1439  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1440  : BaseMemOpClusterMutation(tii, tri, true) {}
1441 };
1442 } // anonymous
1443 
1444 namespace llvm {
1445 
1446 std::unique_ptr<ScheduleDAGMutation>
1448  const TargetRegisterInfo *TRI) {
1449  return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
1450  : nullptr;
1451 }
1452 
1453 std::unique_ptr<ScheduleDAGMutation>
1455  const TargetRegisterInfo *TRI) {
1456  return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
1457  : nullptr;
1458 }
1459 
1460 } // namespace llvm
1461 
1462 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1463  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1464  SmallVector<MemOpInfo, 32> MemOpRecords;
1465  for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
1466  SUnit *SU = MemOps[Idx];
1467  unsigned BaseReg;
1468  int64_t Offset;
1469  if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1470  MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1471  }
1472  if (MemOpRecords.size() < 2)
1473  return;
1474 
1475  std::sort(MemOpRecords.begin(), MemOpRecords.end());
1476  unsigned ClusterLength = 1;
1477  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1478  if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
1479  ClusterLength = 1;
1480  continue;
1481  }
1482 
1483  SUnit *SUa = MemOpRecords[Idx].SU;
1484  SUnit *SUb = MemOpRecords[Idx+1].SU;
1485  if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
1486  ClusterLength) &&
1487  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1488  DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1489  << SUb->NodeNum << ")\n");
1490  // Copy successor edges from SUa to SUb. Interleaving computation
1491  // dependent on SUa can prevent load combining due to register reuse.
1492  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1493  // loads should have effectively the same inputs.
1495  SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1496  if (SI->getSUnit() == SUb)
1497  continue;
1498  DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1499  DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1500  }
1501  ++ClusterLength;
1502  } else
1503  ClusterLength = 1;
1504  }
1505 }
1506 
1507 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1509 
1510  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1511 
1512  // Map DAG NodeNum to store chain ID.
1513  DenseMap<unsigned, unsigned> StoreChainIDs;
1514  // Map each store chain to a set of dependent MemOps.
1515  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1516  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1517  SUnit *SU = &DAG->SUnits[Idx];
1518  if ((IsLoad && !SU->getInstr()->mayLoad()) ||
1519  (!IsLoad && !SU->getInstr()->mayStore()))
1520  continue;
1521 
1522  unsigned ChainPredID = DAG->SUnits.size();
1524  PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1525  if (PI->isCtrl()) {
1526  ChainPredID = PI->getSUnit()->NodeNum;
1527  break;
1528  }
1529  }
1530  // Check if this chain-like pred has been seen
1531  // before. ChainPredID==MaxNodeID at the top of the schedule.
1532  unsigned NumChains = StoreChainDependents.size();
1533  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1534  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1535  if (Result.second)
1536  StoreChainDependents.resize(NumChains + 1);
1537  StoreChainDependents[Result.first->second].push_back(SU);
1538  }
1539 
1540  // Iterate over the store chains.
1541  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1542  clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
1543 }
1544 
1545 //===----------------------------------------------------------------------===//
1546 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1547 //===----------------------------------------------------------------------===//
1548 
1549 namespace {
1550 /// \brief Post-process the DAG to create cluster edges between instructions
1551 /// that may be fused by the processor into a single operation.
1552 class MacroFusion : public ScheduleDAGMutation {
1553  const TargetInstrInfo &TII;
1554 public:
1555  MacroFusion(const TargetInstrInfo &TII)
1556  : TII(TII) {}
1557 
1558  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1559 };
1560 } // anonymous
1561 
1562 namespace llvm {
1563 
1564 std::unique_ptr<ScheduleDAGMutation>
1566  return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
1567 }
1568 
1569 } // namespace llvm
1570 
1571 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1572 /// fused operations.
1573 void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
1574  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1575 
1576  // For now, assume targets can only fuse with the branch.
1577  SUnit &ExitSU = DAG->ExitSU;
1578  MachineInstr *Branch = ExitSU.getInstr();
1579  if (!Branch)
1580  return;
1581 
1582  for (SDep &PredDep : ExitSU.Preds) {
1583  if (PredDep.isWeak())
1584  continue;
1585  SUnit &SU = *PredDep.getSUnit();
1586  MachineInstr &Pred = *SU.getInstr();
1587  if (!TII.shouldScheduleAdjacent(Pred, *Branch))
1588  continue;
1589 
1590  // Create a single weak edge from SU to ExitSU. The only effect is to cause
1591  // bottom-up scheduling to heavily prioritize the clustered SU. There is no
1592  // need to copy predecessor edges from ExitSU to SU, since top-down
1593  // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1594  // of SU, we could create an artificial edge from the deepest root, but it
1595  // hasn't been needed yet.
1596  bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
1597  (void)Success;
1598  assert(Success && "No DAG nodes should be reachable from ExitSU");
1599 
1600  // Adjust latency of data deps between the nodes.
1601  for (SDep &PredDep : ExitSU.Preds) {
1602  if (PredDep.getSUnit() == &SU)
1603  PredDep.setLatency(0);
1604  }
1605  for (SDep &SuccDep : SU.Succs) {
1606  if (SuccDep.getSUnit() == &ExitSU)
1607  SuccDep.setLatency(0);
1608  }
1609 
1610  DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
1611  break;
1612  }
1613 }
1614 
1615 //===----------------------------------------------------------------------===//
1616 // CopyConstrain - DAG post-processing to encourage copy elimination.
1617 //===----------------------------------------------------------------------===//
1618 
1619 namespace {
1620 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1621 /// the one use that defines the copy's source vreg, most likely an induction
1622 /// variable increment.
1623 class CopyConstrain : public ScheduleDAGMutation {
1624  // Transient state.
1625  SlotIndex RegionBeginIdx;
1626  // RegionEndIdx is the slot index of the last non-debug instruction in the
1627  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1628  SlotIndex RegionEndIdx;
1629 public:
1630  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1631 
1632  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1633 
1634 protected:
1635  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1636 };
1637 } // anonymous
1638 
1639 namespace llvm {
1640 
1641 std::unique_ptr<ScheduleDAGMutation>
1643  const TargetRegisterInfo *TRI) {
1644  return make_unique<CopyConstrain>(TII, TRI);
1645 }
1646 
1647 } // namespace llvm
1648 
1649 /// constrainLocalCopy handles two possibilities:
1650 /// 1) Local src:
1651 /// I0: = dst
1652 /// I1: src = ...
1653 /// I2: = dst
1654 /// I3: dst = src (copy)
1655 /// (create pred->succ edges I0->I1, I2->I1)
1656 ///
1657 /// 2) Local copy:
1658 /// I0: dst = src (copy)
1659 /// I1: = dst
1660 /// I2: src = ...
1661 /// I3: = dst
1662 /// (create pred->succ edges I1->I2, I3->I2)
1663 ///
1664 /// Although the MachineScheduler is currently constrained to single blocks,
1665 /// this algorithm should handle extended blocks. An EBB is a set of
1666 /// contiguously numbered blocks such that the previous block in the EBB is
1667 /// always the single predecessor.
1668 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1669  LiveIntervals *LIS = DAG->getLIS();
1670  MachineInstr *Copy = CopySU->getInstr();
1671 
1672  // Check for pure vreg copies.
1673  const MachineOperand &SrcOp = Copy->getOperand(1);
1674  unsigned SrcReg = SrcOp.getReg();
1675  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1676  return;
1677 
1678  const MachineOperand &DstOp = Copy->getOperand(0);
1679  unsigned DstReg = DstOp.getReg();
1680  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1681  return;
1682 
1683  // Check if either the dest or source is local. If it's live across a back
1684  // edge, it's not local. Note that if both vregs are live across the back
1685  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1686  // If both the copy's source and dest are local live intervals, then we
1687  // should treat the dest as the global for the purpose of adding
1688  // constraints. This adds edges from source's other uses to the copy.
1689  unsigned LocalReg = SrcReg;
1690  unsigned GlobalReg = DstReg;
1691  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1692  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1693  LocalReg = DstReg;
1694  GlobalReg = SrcReg;
1695  LocalLI = &LIS->getInterval(LocalReg);
1696  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1697  return;
1698  }
1699  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1700 
1701  // Find the global segment after the start of the local LI.
1702  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1703  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1704  // local live range. We could create edges from other global uses to the local
1705  // start, but the coalescer should have already eliminated these cases, so
1706  // don't bother dealing with it.
1707  if (GlobalSegment == GlobalLI->end())
1708  return;
1709 
1710  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1711  // returned the next global segment. But if GlobalSegment overlaps with
1712  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1713  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1714  if (GlobalSegment->contains(LocalLI->beginIndex()))
1715  ++GlobalSegment;
1716 
1717  if (GlobalSegment == GlobalLI->end())
1718  return;
1719 
1720  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1721  if (GlobalSegment != GlobalLI->begin()) {
1722  // Two address defs have no hole.
1723  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1724  GlobalSegment->start)) {
1725  return;
1726  }
1727  // If the prior global segment may be defined by the same two-address
1728  // instruction that also defines LocalLI, then can't make a hole here.
1729  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1730  LocalLI->beginIndex())) {
1731  return;
1732  }
1733  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1734  // it would be a disconnected component in the live range.
1735  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1736  "Disconnected LRG within the scheduling region.");
1737  }
1738  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1739  if (!GlobalDef)
1740  return;
1741 
1742  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1743  if (!GlobalSU)
1744  return;
1745 
1746  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1747  // constraining the uses of the last local def to precede GlobalDef.
1748  SmallVector<SUnit*,8> LocalUses;
1749  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1750  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1751  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1753  I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1754  I != E; ++I) {
1755  if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1756  continue;
1757  if (I->getSUnit() == GlobalSU)
1758  continue;
1759  if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1760  return;
1761  LocalUses.push_back(I->getSUnit());
1762  }
1763  // Open the top of the GlobalLI hole by constraining any earlier global uses
1764  // to precede the start of LocalLI.
1765  SmallVector<SUnit*,8> GlobalUses;
1766  MachineInstr *FirstLocalDef =
1767  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1768  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1770  I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1771  if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1772  continue;
1773  if (I->getSUnit() == FirstLocalSU)
1774  continue;
1775  if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1776  return;
1777  GlobalUses.push_back(I->getSUnit());
1778  }
1779  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1780  // Add the weak edges.
1782  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1783  DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1784  << GlobalSU->NodeNum << ")\n");
1785  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1786  }
1788  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1789  DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1790  << FirstLocalSU->NodeNum << ")\n");
1791  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1792  }
1793 }
1794 
1795 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1796 /// copy elimination.
1797 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1798  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1799  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1800 
1801  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1802  if (FirstPos == DAG->end())
1803  return;
1804  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1805  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1806  *priorNonDebug(DAG->end(), DAG->begin()));
1807 
1808  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1809  SUnit *SU = &DAG->SUnits[Idx];
1810  if (!SU->getInstr()->isCopy())
1811  continue;
1812 
1813  constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1814  }
1815 }
1816 
1817 //===----------------------------------------------------------------------===//
1818 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1819 // and possibly other custom schedulers.
1820 //===----------------------------------------------------------------------===//
1821 
1822 static const unsigned InvalidCycle = ~0U;
1823 
1825 
1827  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1828  // Destroying and reconstructing it is very expensive though. So keep
1829  // invalid, placeholder HazardRecs.
1830  if (HazardRec && HazardRec->isEnabled()) {
1831  delete HazardRec;
1832  HazardRec = nullptr;
1833  }
1834  Available.clear();
1835  Pending.clear();
1836  CheckPending = false;
1837  CurrCycle = 0;
1838  CurrMOps = 0;
1839  MinReadyCycle = UINT_MAX;
1840  ExpectedLatency = 0;
1841  DependentLatency = 0;
1842  RetiredMOps = 0;
1843  MaxExecutedResCount = 0;
1844  ZoneCritResIdx = 0;
1845  IsResourceLimited = false;
1846  ReservedCycles.clear();
1847 #ifndef NDEBUG
1848  // Track the maximum number of stall cycles that could arise either from the
1849  // latency of a DAG edge or the number of cycles that a processor resource is
1850  // reserved (SchedBoundary::ReservedCycles).
1851  MaxObservedStall = 0;
1852 #endif
1853  // Reserve a zero-count for invalid CritResIdx.
1854  ExecutedResCounts.resize(1);
1855  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1856 }
1857 
1858 void SchedRemainder::
1859 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1860  reset();
1861  if (!SchedModel->hasInstrSchedModel())
1862  return;
1864  for (std::vector<SUnit>::iterator
1865  I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1866  const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1867  RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1868  * SchedModel->getMicroOpFactor();
1870  PI = SchedModel->getWriteProcResBegin(SC),
1871  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1872  unsigned PIdx = PI->ProcResourceIdx;
1873  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1874  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1875  }
1876  }
1877 }
1878 
1879 void SchedBoundary::
1881  reset();
1882  DAG = dag;
1883  SchedModel = smodel;
1884  Rem = rem;
1885  if (SchedModel->hasInstrSchedModel()) {
1886  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1888  }
1889 }
1890 
1891 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1892 /// these "soft stalls" differently than the hard stall cycles based on CPU
1893 /// resources and computed by checkHazard(). A fully in-order model
1894 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1895 /// available for scheduling until they are ready. However, a weaker in-order
1896 /// model may use this for heuristics. For example, if a processor has in-order
1897 /// behavior when reading certain resources, this may come into play.
1899  if (!SU->isUnbuffered)
1900  return 0;
1901 
1902  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1903  if (ReadyCycle > CurrCycle)
1904  return ReadyCycle - CurrCycle;
1905  return 0;
1906 }
1907 
1908 /// Compute the next cycle at which the given processor resource can be
1909 /// scheduled.
1910 unsigned SchedBoundary::
1911 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1912  unsigned NextUnreserved = ReservedCycles[PIdx];
1913  // If this resource has never been used, always return cycle zero.
1914  if (NextUnreserved == InvalidCycle)
1915  return 0;
1916  // For bottom-up scheduling add the cycles needed for the current operation.
1917  if (!isTop())
1918  NextUnreserved += Cycles;
1919  return NextUnreserved;
1920 }
1921 
1922 /// Does this SU have a hazard within the current instruction group.
1923 ///
1924 /// The scheduler supports two modes of hazard recognition. The first is the
1925 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1926 /// supports highly complicated in-order reservation tables
1927 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1928 ///
1929 /// The second is a streamlined mechanism that checks for hazards based on
1930 /// simple counters that the scheduler itself maintains. It explicitly checks
1931 /// for instruction dispatch limitations, including the number of micro-ops that
1932 /// can dispatch per cycle.
1933 ///
1934 /// TODO: Also check whether the SU must start a new group.
1936  if (HazardRec->isEnabled()
1938  return true;
1939  }
1940  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1941  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1942  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1943  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1944  return true;
1945  }
1947  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1949  PI = SchedModel->getWriteProcResBegin(SC),
1950  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1951  unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
1952  if (NRCycle > CurrCycle) {
1953 #ifndef NDEBUG
1954  MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
1955 #endif
1956  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1957  << SchedModel->getResourceName(PI->ProcResourceIdx)
1958  << "=" << NRCycle << "c\n");
1959  return true;
1960  }
1961  }
1962  }
1963  return false;
1964 }
1965 
1966 // Find the unscheduled node in ReadySUs with the highest latency.
1967 unsigned SchedBoundary::
1969  SUnit *LateSU = nullptr;
1970  unsigned RemLatency = 0;
1971  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1972  I != E; ++I) {
1973  unsigned L = getUnscheduledLatency(*I);
1974  if (L > RemLatency) {
1975  RemLatency = L;
1976  LateSU = *I;
1977  }
1978  }
1979  if (LateSU) {
1980  DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1981  << LateSU->NodeNum << ") " << RemLatency << "c\n");
1982  }
1983  return RemLatency;
1984 }
1985 
1986 // Count resources in this zone and the remaining unscheduled
1987 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1988 // resource index, or zero if the zone is issue limited.
1989 unsigned SchedBoundary::
1990 getOtherResourceCount(unsigned &OtherCritIdx) {
1991  OtherCritIdx = 0;
1993  return 0;
1994 
1995  unsigned OtherCritCount = Rem->RemIssueCount
1996  + (RetiredMOps * SchedModel->getMicroOpFactor());
1997  DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
1998  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1999  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2000  PIdx != PEnd; ++PIdx) {
2001  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2002  if (OtherCount > OtherCritCount) {
2003  OtherCritCount = OtherCount;
2004  OtherCritIdx = PIdx;
2005  }
2006  }
2007  if (OtherCritIdx) {
2008  DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
2009  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2010  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2011  }
2012  return OtherCritCount;
2013 }
2014 
2015 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2016  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2017 
2018 #ifndef NDEBUG
2019  // ReadyCycle was been bumped up to the CurrCycle when this node was
2020  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2021  // scheduling, so may now be greater than ReadyCycle.
2022  if (ReadyCycle > CurrCycle)
2023  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2024 #endif
2025 
2026  if (ReadyCycle < MinReadyCycle)
2027  MinReadyCycle = ReadyCycle;
2028 
2029  // Check for interlocks first. For the purpose of other heuristics, an
2030  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2031  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2032  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2034  Pending.push(SU);
2035  else
2036  Available.push(SU);
2037 }
2038 
2039 /// Move the boundary of scheduled code by one cycle.
2040 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2041  if (SchedModel->getMicroOpBufferSize() == 0) {
2042  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
2043  if (MinReadyCycle > NextCycle)
2044  NextCycle = MinReadyCycle;
2045  }
2046  // Update the current micro-ops, which will issue in the next cycle.
2047  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2048  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2049 
2050  // Decrement DependentLatency based on the next cycle.
2051  if ((NextCycle - CurrCycle) > DependentLatency)
2052  DependentLatency = 0;
2053  else
2054  DependentLatency -= (NextCycle - CurrCycle);
2055 
2056  if (!HazardRec->isEnabled()) {
2057  // Bypass HazardRec virtual calls.
2058  CurrCycle = NextCycle;
2059  } else {
2060  // Bypass getHazardType calls in case of long latency.
2061  for (; CurrCycle != NextCycle; ++CurrCycle) {
2062  if (isTop())
2064  else
2066  }
2067  }
2068  CheckPending = true;
2069  unsigned LFactor = SchedModel->getLatencyFactor();
2070  IsResourceLimited =
2071  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2072  > (int)LFactor;
2073 
2074  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2075 }
2076 
2077 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2078  ExecutedResCounts[PIdx] += Count;
2079  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2080  MaxExecutedResCount = ExecutedResCounts[PIdx];
2081 }
2082 
2083 /// Add the given processor resource to this scheduled zone.
2084 ///
2085 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2086 /// during which this resource is consumed.
2087 ///
2088 /// \return the next cycle at which the instruction may execute without
2089 /// oversubscribing resources.
2090 unsigned SchedBoundary::
2091 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2092  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2093  unsigned Count = Factor * Cycles;
2094  DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
2095  << " +" << Cycles << "x" << Factor << "u\n");
2096 
2097  // Update Executed resources counts.
2098  incExecutedResources(PIdx, Count);
2099  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2100  Rem->RemainingCounts[PIdx] -= Count;
2101 
2102  // Check if this resource exceeds the current critical resource. If so, it
2103  // becomes the critical resource.
2104  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2105  ZoneCritResIdx = PIdx;
2106  DEBUG(dbgs() << " *** Critical resource "
2107  << SchedModel->getResourceName(PIdx) << ": "
2108  << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2109  }
2110  // For reserved resources, record the highest cycle using the resource.
2111  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2112  if (NextAvailable > CurrCycle) {
2113  DEBUG(dbgs() << " Resource conflict: "
2114  << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2115  << NextAvailable << "\n");
2116  }
2117  return NextAvailable;
2118 }
2119 
2120 /// Move the boundary of scheduled code by one SUnit.
2122  // Update the reservation table.
2123  if (HazardRec->isEnabled()) {
2124  if (!isTop() && SU->isCall) {
2125  // Calls are scheduled with their preceding instructions. For bottom-up
2126  // scheduling, clear the pipeline state before emitting.
2127  HazardRec->Reset();
2128  }
2130  }
2131  // checkHazard should prevent scheduling multiple instructions per cycle that
2132  // exceed the issue width.
2133  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2134  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2135  assert(
2136  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2137  "Cannot schedule this instruction's MicroOps in the current cycle.");
2138 
2139  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2140  DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2141 
2142  unsigned NextCycle = CurrCycle;
2143  switch (SchedModel->getMicroOpBufferSize()) {
2144  case 0:
2145  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2146  break;
2147  case 1:
2148  if (ReadyCycle > NextCycle) {
2149  NextCycle = ReadyCycle;
2150  DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2151  }
2152  break;
2153  default:
2154  // We don't currently model the OOO reorder buffer, so consider all
2155  // scheduled MOps to be "retired". We do loosely model in-order resource
2156  // latency. If this instruction uses an in-order resource, account for any
2157  // likely stall cycles.
2158  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2159  NextCycle = ReadyCycle;
2160  break;
2161  }
2162  RetiredMOps += IncMOps;
2163 
2164  // Update resource counts and critical resource.
2165  if (SchedModel->hasInstrSchedModel()) {
2166  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2167  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2168  Rem->RemIssueCount -= DecRemIssue;
2169  if (ZoneCritResIdx) {
2170  // Scale scheduled micro-ops for comparing with the critical resource.
2171  unsigned ScaledMOps =
2172  RetiredMOps * SchedModel->getMicroOpFactor();
2173 
2174  // If scaled micro-ops are now more than the previous critical resource by
2175  // a full cycle, then micro-ops issue becomes critical.
2176  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2177  >= (int)SchedModel->getLatencyFactor()) {
2178  ZoneCritResIdx = 0;
2179  DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2180  << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2181  }
2182  }
2184  PI = SchedModel->getWriteProcResBegin(SC),
2185  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2186  unsigned RCycle =
2187  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2188  if (RCycle > NextCycle)
2189  NextCycle = RCycle;
2190  }
2191  if (SU->hasReservedResource) {
2192  // For reserved resources, record the highest cycle using the resource.
2193  // For top-down scheduling, this is the cycle in which we schedule this
2194  // instruction plus the number of cycles the operations reserves the
2195  // resource. For bottom-up is it simply the instruction's cycle.
2197  PI = SchedModel->getWriteProcResBegin(SC),
2198  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2199  unsigned PIdx = PI->ProcResourceIdx;
2200  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2201  if (isTop()) {
2202  ReservedCycles[PIdx] =
2203  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2204  }
2205  else
2206  ReservedCycles[PIdx] = NextCycle;
2207  }
2208  }
2209  }
2210  }
2211  // Update ExpectedLatency and DependentLatency.
2212  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2213  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2214  if (SU->getDepth() > TopLatency) {
2215  TopLatency = SU->getDepth();
2216  DEBUG(dbgs() << " " << Available.getName()
2217  << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2218  }
2219  if (SU->getHeight() > BotLatency) {
2220  BotLatency = SU->getHeight();
2221  DEBUG(dbgs() << " " << Available.getName()
2222  << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2223  }
2224  // If we stall for any reason, bump the cycle.
2225  if (NextCycle > CurrCycle) {
2226  bumpCycle(NextCycle);
2227  } else {
2228  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2229  // resource limited. If a stall occurred, bumpCycle does this.
2230  unsigned LFactor = SchedModel->getLatencyFactor();
2231  IsResourceLimited =
2232  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2233  > (int)LFactor;
2234  }
2235  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2236  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2237  // one cycle. Since we commonly reach the max MOps here, opportunistically
2238  // bump the cycle to avoid uselessly checking everything in the readyQ.
2239  CurrMOps += IncMOps;
2240  while (CurrMOps >= SchedModel->getIssueWidth()) {
2241  DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2242  << " at cycle " << CurrCycle << '\n');
2243  bumpCycle(++NextCycle);
2244  }
2246 }
2247 
2248 /// Release pending ready nodes in to the available queue. This makes them
2249 /// visible to heuristics.
2251  // If the available queue is empty, it is safe to reset MinReadyCycle.
2252  if (Available.empty())
2253  MinReadyCycle = UINT_MAX;
2254 
2255  // Check to see if any of the pending instructions are ready to issue. If
2256  // so, add them to the available queue.
2257  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2258  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2259  SUnit *SU = *(Pending.begin()+i);
2260  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2261 
2262  if (ReadyCycle < MinReadyCycle)
2263  MinReadyCycle = ReadyCycle;
2264 
2265  if (!IsBuffered && ReadyCycle > CurrCycle)
2266  continue;
2267 
2268  if (checkHazard(SU))
2269  continue;
2270 
2271  if (Available.size() >= ReadyListLimit)
2272  break;
2273 
2274  Available.push(SU);
2276  --i; --e;
2277  }
2278  CheckPending = false;
2279 }
2280 
2281 /// Remove SU from the ready set for this boundary.
2283  if (Available.isInQueue(SU))
2285  else {
2286  assert(Pending.isInQueue(SU) && "bad ready count");
2287  Pending.remove(Pending.find(SU));
2288  }
2289 }
2290 
2291 /// If this queue only has one ready candidate, return it. As a side effect,
2292 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2293 /// one node is ready. If multiple instructions are ready, return NULL.
2295  if (CheckPending)
2296  releasePending();
2297 
2298  if (CurrMOps > 0) {
2299  // Defer any ready instrs that now have a hazard.
2300  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2301  if (checkHazard(*I)) {
2302  Pending.push(*I);
2303  I = Available.remove(I);
2304  continue;
2305  }
2306  ++I;
2307  }
2308  }
2309  for (unsigned i = 0; Available.empty(); ++i) {
2310 // FIXME: Re-enable assert once PR20057 is resolved.
2311 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2312 // "permanent hazard");
2313  (void)i;
2314  bumpCycle(CurrCycle + 1);
2315  releasePending();
2316  }
2317 
2318  DEBUG(Pending.dump());
2319  DEBUG(Available.dump());
2320 
2321  if (Available.size() == 1)
2322  return *Available.begin();
2323  return nullptr;
2324 }
2325 
2326 #ifndef NDEBUG
2327 // This is useful information to dump after bumpNode.
2328 // Note that the Queue contents are more useful before pickNodeFromQueue.
2330  unsigned ResFactor;
2331  unsigned ResCount;
2332  if (ZoneCritResIdx) {
2333  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2334  ResCount = getResourceCount(ZoneCritResIdx);
2335  } else {
2336  ResFactor = SchedModel->getMicroOpFactor();
2337  ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2338  }
2339  unsigned LFactor = SchedModel->getLatencyFactor();
2340  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2341  << " Retired: " << RetiredMOps;
2342  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2343  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2344  << ResCount / ResFactor << " "
2345  << SchedModel->getResourceName(ZoneCritResIdx)
2346  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2347  << (IsResourceLimited ? " - Resource" : " - Latency")
2348  << " limited.\n";
2349 }
2350 #endif
2351 
2352 //===----------------------------------------------------------------------===//
2353 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2354 //===----------------------------------------------------------------------===//
2355 
2358  const TargetSchedModel *SchedModel) {
2360  return;
2361 
2362  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2364  PI = SchedModel->getWriteProcResBegin(SC),
2365  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2366  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2367  ResDelta.CritResources += PI->Cycles;
2368  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2369  ResDelta.DemandedResources += PI->Cycles;
2370  }
2371 }
2372 
2373 /// Set the CandPolicy given a scheduling zone given the current resources and
2374 /// latencies inside and outside the zone.
2375 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2376  SchedBoundary &CurrZone,
2377  SchedBoundary *OtherZone) {
2378  // Apply preemptive heuristics based on the total latency and resources
2379  // inside and outside this zone. Potential stalls should be considered before
2380  // following this policy.
2381 
2382  // Compute remaining latency. We need this both to determine whether the
2383  // overall schedule has become latency-limited and whether the instructions
2384  // outside this zone are resource or latency limited.
2385  //
2386  // The "dependent" latency is updated incrementally during scheduling as the
2387  // max height/depth of scheduled nodes minus the cycles since it was
2388  // scheduled:
2389  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2390  //
2391  // The "independent" latency is the max ready queue depth:
2392  // ILat = max N.depth for N in Available|Pending
2393  //
2394  // RemainingLatency is the greater of independent and dependent latency.
2395  unsigned RemLatency = CurrZone.getDependentLatency();
2396  RemLatency = std::max(RemLatency,
2397  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2398  RemLatency = std::max(RemLatency,
2399  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2400 
2401  // Compute the critical resource outside the zone.
2402  unsigned OtherCritIdx = 0;
2403  unsigned OtherCount =
2404  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2405 
2406  bool OtherResLimited = false;
2407  if (SchedModel->hasInstrSchedModel()) {
2408  unsigned LFactor = SchedModel->getLatencyFactor();
2409  OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2410  }
2411  // Schedule aggressively for latency in PostRA mode. We don't check for
2412  // acyclic latency during PostRA, and highly out-of-order processors will
2413  // skip PostRA scheduling.
2414  if (!OtherResLimited) {
2415  if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2416  Policy.ReduceLatency |= true;
2417  DEBUG(dbgs() << " " << CurrZone.Available.getName()
2418  << " RemainingLatency " << RemLatency << " + "
2419  << CurrZone.getCurrCycle() << "c > CritPath "
2420  << Rem.CriticalPath << "\n");
2421  }
2422  }
2423  // If the same resource is limiting inside and outside the zone, do nothing.
2424  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2425  return;
2426 
2427  DEBUG(
2428  if (CurrZone.isResourceLimited()) {
2429  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2431  << "\n";
2432  }
2433  if (OtherResLimited)
2434  dbgs() << " RemainingLimit: "
2435  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2436  if (!CurrZone.isResourceLimited() && !OtherResLimited)
2437  dbgs() << " Latency limited both directions.\n");
2438 
2439  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2440  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2441 
2442  if (OtherResLimited)
2443  Policy.DemandResIdx = OtherCritIdx;
2444 }
2445 
2446 #ifndef NDEBUG
2449  switch (Reason) {
2450  case NoCand: return "NOCAND ";
2451  case Only1: return "ONLY1 ";
2452  case PhysRegCopy: return "PREG-COPY ";
2453  case RegExcess: return "REG-EXCESS";
2454  case RegCritical: return "REG-CRIT ";
2455  case Stall: return "STALL ";
2456  case Cluster: return "CLUSTER ";
2457  case Weak: return "WEAK ";
2458  case RegMax: return "REG-MAX ";
2459  case ResourceReduce: return "RES-REDUCE";
2460  case ResourceDemand: return "RES-DEMAND";
2461  case TopDepthReduce: return "TOP-DEPTH ";
2462  case TopPathReduce: return "TOP-PATH ";
2463  case BotHeightReduce:return "BOT-HEIGHT";
2464  case BotPathReduce: return "BOT-PATH ";
2465  case NextDefUse: return "DEF-USE ";
2466  case NodeOrder: return "ORDER ";
2467  };
2468  llvm_unreachable("Unknown reason!");
2469 }
2470 
2472  PressureChange P;
2473  unsigned ResIdx = 0;
2474  unsigned Latency = 0;
2475  switch (Cand.Reason) {
2476  default:
2477  break;
2478  case RegExcess:
2479  P = Cand.RPDelta.Excess;
2480  break;
2481  case RegCritical:
2482  P = Cand.RPDelta.CriticalMax;
2483  break;
2484  case RegMax:
2485  P = Cand.RPDelta.CurrentMax;
2486  break;
2487  case ResourceReduce:
2488  ResIdx = Cand.Policy.ReduceResIdx;
2489  break;
2490  case ResourceDemand:
2491  ResIdx = Cand.Policy.DemandResIdx;
2492  break;
2493  case TopDepthReduce:
2494  Latency = Cand.SU->getDepth();
2495  break;
2496  case TopPathReduce:
2497  Latency = Cand.SU->getHeight();
2498  break;
2499  case BotHeightReduce:
2500  Latency = Cand.SU->getHeight();
2501  break;
2502  case BotPathReduce:
2503  Latency = Cand.SU->getDepth();
2504  break;
2505  }
2506  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2507  if (P.isValid())
2508  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2509  << ":" << P.getUnitInc() << " ";
2510  else
2511  dbgs() << " ";
2512  if (ResIdx)
2513  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2514  else
2515  dbgs() << " ";
2516  if (Latency)
2517  dbgs() << " " << Latency << " cycles ";
2518  else
2519  dbgs() << " ";
2520  dbgs() << '\n';
2521 }
2522 #endif
2523 
2524 /// Return true if this heuristic determines order.
2525 static bool tryLess(int TryVal, int CandVal,
2529  if (TryVal < CandVal) {
2530  TryCand.Reason = Reason;
2531  return true;
2532  }
2533  if (TryVal > CandVal) {
2534  if (Cand.Reason > Reason)
2535  Cand.Reason = Reason;
2536  return true;
2537  }
2538  return false;
2539 }
2540 
2541 static bool tryGreater(int TryVal, int CandVal,
2545  if (TryVal > CandVal) {
2546  TryCand.Reason = Reason;
2547  return true;
2548  }
2549  if (TryVal < CandVal) {
2550  if (Cand.Reason > Reason)
2551  Cand.Reason = Reason;
2552  return true;
2553  }
2554  return false;
2555 }
2556 
2559  SchedBoundary &Zone) {
2560  if (Zone.isTop()) {
2561  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2562  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2563  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2564  return true;
2565  }
2566  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2567  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2568  return true;
2569  } else {
2570  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2571  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2572  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2573  return true;
2574  }
2575  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2576  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2577  return true;
2578  }
2579  return false;
2580 }
2581 
2582 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2583  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2584  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2585 }
2586 
2588  tracePick(Cand.Reason, Cand.AtTop);
2589 }
2590 
2592  assert(dag->hasVRegLiveness() &&
2593  "(PreRA)GenericScheduler needs vreg liveness");
2594  DAG = static_cast<ScheduleDAGMILive*>(dag);
2595  SchedModel = DAG->getSchedModel();
2596  TRI = DAG->TRI;
2597 
2598  Rem.init(DAG, SchedModel);
2599  Top.init(DAG, SchedModel, &Rem);
2600  Bot.init(DAG, SchedModel, &Rem);
2601 
2602  // Initialize resource counts.
2603 
2604  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2605  // are disabled, then these HazardRecs will be disabled.
2607  if (!Top.HazardRec) {
2608  Top.HazardRec =
2609  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2610  Itin, DAG);
2611  }
2612  if (!Bot.HazardRec) {
2613  Bot.HazardRec =
2614  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2615  Itin, DAG);
2616  }
2617  TopCand.SU = nullptr;
2618  BotCand.SU = nullptr;
2619 }
2620 
2621 /// Initialize the per-region scheduling policy.
2624  unsigned NumRegionInstrs) {
2625  const MachineFunction &MF = *Begin->getParent()->getParent();
2626  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2627 
2628  // Avoid setting up the register pressure tracker for small regions to save
2629  // compile time. As a rough heuristic, only track pressure when the number of
2630  // schedulable instructions exceeds half the integer register file.
2631  RegionPolicy.ShouldTrackPressure = true;
2632  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2633  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2634  if (TLI->isTypeLegal(LegalIntVT)) {
2635  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2636  TLI->getRegClassFor(LegalIntVT));
2637  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2638  }
2639  }
2640 
2641  // For generic targets, we default to bottom-up, because it's simpler and more
2642  // compile-time optimizations have been implemented in that direction.
2643  RegionPolicy.OnlyBottomUp = true;
2644 
2645  // Allow the subtarget to override default policy.
2646  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2647 
2648  // After subtarget overrides, apply command line options.
2649  if (!EnableRegPressure)
2650  RegionPolicy.ShouldTrackPressure = false;
2651 
2652  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2653  // e.g. -misched-bottomup=false allows scheduling in both directions.
2655  "-misched-topdown incompatible with -misched-bottomup");
2656  if (ForceBottomUp.getNumOccurrences() > 0) {
2657  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2658  if (RegionPolicy.OnlyBottomUp)
2659  RegionPolicy.OnlyTopDown = false;
2660  }
2661  if (ForceTopDown.getNumOccurrences() > 0) {
2662  RegionPolicy.OnlyTopDown = ForceTopDown;
2663  if (RegionPolicy.OnlyTopDown)
2664  RegionPolicy.OnlyBottomUp = false;
2665  }
2666 }
2667 
2669  dbgs() << "GenericScheduler RegionPolicy: "
2670  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2671  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2672  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2673  << "\n";
2674 }
2675 
2676 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2677 /// critical path by more cycles than it takes to drain the instruction buffer.
2678 /// We estimate an upper bounds on in-flight instructions as:
2679 ///
2680 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2681 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2682 /// InFlightResources = InFlightIterations * LoopResources
2683 ///
2684 /// TODO: Check execution resources in addition to IssueCount.
2687  return;
2688 
2689  // Scaled number of cycles per loop iteration.
2690  unsigned IterCount =
2692  Rem.RemIssueCount);
2693  // Scaled acyclic critical path.
2694  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2695  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2696  unsigned InFlightCount =
2697  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2698  unsigned BufferLimit =
2700 
2701  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2702 
2703  DEBUG(dbgs() << "IssueCycles="
2705  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2706  << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2707  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2708  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2710  dbgs() << " ACYCLIC LATENCY LIMIT\n");
2711 }
2712 
2714  Rem.CriticalPath = DAG->ExitSU.getDepth();
2715 
2716  // Some roots may not feed into ExitSU. Check all of them in case.
2717  for (std::vector<SUnit*>::const_iterator
2718  I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2719  if ((*I)->getDepth() > Rem.CriticalPath)
2720  Rem.CriticalPath = (*I)->getDepth();
2721  }
2722  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2723  if (DumpCriticalPathLength) {
2724  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2725  }
2726 
2727  if (EnableCyclicPath) {
2728  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2729  checkAcyclicLatency();
2730  }
2731 }
2732 
2733 static bool tryPressure(const PressureChange &TryP,
2734  const PressureChange &CandP,
2738  const TargetRegisterInfo *TRI,
2739  const MachineFunction &MF) {
2740  // If one candidate decreases and the other increases, go with it.
2741  // Invalid candidates have UnitInc==0.
2742  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2743  Reason)) {
2744  return true;
2745  }
2746  // Do not compare the magnitude of pressure changes between top and bottom
2747  // boundary.
2748  if (Cand.AtTop != TryCand.AtTop)
2749  return false;
2750 
2751  // If both candidates affect the same set in the same boundary, go with the
2752  // smallest increase.
2753  unsigned TryPSet = TryP.getPSetOrMax();
2754  unsigned CandPSet = CandP.getPSetOrMax();
2755  if (TryPSet == CandPSet) {
2756  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2757  Reason);
2758  }
2759 
2760  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2761  std::numeric_limits<int>::max();
2762 
2763  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2764  std::numeric_limits<int>::max();
2765 
2766  // If the candidates are decreasing pressure, reverse priority.
2767  if (TryP.getUnitInc() < 0)
2768  std::swap(TryRank, CandRank);
2769  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2770 }
2771 
2772 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2773  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2774 }
2775 
2776 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2777 /// their physreg def/use.
2778 ///
2779 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2780 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2781 /// with the operation that produces or consumes the physreg. We'll do this when
2782 /// regalloc has support for parallel copies.
2783 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2784  const MachineInstr *MI = SU->getInstr();
2785  if (!MI->isCopy())
2786  return 0;
2787 
2788  unsigned ScheduledOper = isTop ? 1 : 0;
2789  unsigned UnscheduledOper = isTop ? 0 : 1;
2790  // If we have already scheduled the physreg produce/consumer, immediately
2791  // schedule the copy.
2793  MI->getOperand(ScheduledOper).getReg()))
2794  return 1;
2795  // If the physreg is at the boundary, defer it. Otherwise schedule it
2796  // immediately to free the dependent. We can hoist the copy later.
2797  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2799  MI->getOperand(UnscheduledOper).getReg()))
2800  return AtBoundary ? -1 : 1;
2801  return 0;
2802 }
2803 
2805  bool AtTop,
2806  const RegPressureTracker &RPTracker,
2807  RegPressureTracker &TempTracker) {
2808  Cand.SU = SU;
2809  Cand.AtTop = AtTop;
2810  if (DAG->isTrackingPressure()) {
2811  if (AtTop) {
2812  TempTracker.getMaxDownwardPressureDelta(
2813  Cand.SU->getInstr(),
2814  Cand.RPDelta,
2815  DAG->getRegionCriticalPSets(),
2816  DAG->getRegPressure().MaxSetPressure);
2817  } else {
2818  if (VerifyScheduling) {
2819  TempTracker.getMaxUpwardPressureDelta(
2820  Cand.SU->getInstr(),
2821  &DAG->getPressureDiff(Cand.SU),
2822  Cand.RPDelta,
2823  DAG->getRegionCriticalPSets(),
2824  DAG->getRegPressure().MaxSetPressure);
2825  } else {
2826  RPTracker.getUpwardPressureDelta(
2827  Cand.SU->getInstr(),
2828  DAG->getPressureDiff(Cand.SU),
2829  Cand.RPDelta,
2830  DAG->getRegionCriticalPSets(),
2831  DAG->getRegPressure().MaxSetPressure);
2832  }
2833  }
2834  }
2835  DEBUG(if (Cand.RPDelta.Excess.isValid())
2836  dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
2838  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2839 }
2840 
2841 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2842 /// hierarchical. This may be more efficient than a graduated cost model because
2843 /// we don't need to evaluate all aspects of the model for each node in the
2844 /// queue. But it's really done to make the heuristics easier to debug and
2845 /// statistically analyze.
2846 ///
2847 /// \param Cand provides the policy and current best candidate.
2848 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2849 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2850 // if Cand is from a different zone than TryCand.
2852  SchedCandidate &TryCand,
2853  SchedBoundary *Zone) {
2854  // Initialize the candidate if needed.
2855  if (!Cand.isValid()) {
2856  TryCand.Reason = NodeOrder;
2857  return;
2858  }
2859 
2860  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2861  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2862  TryCand, Cand, PhysRegCopy))
2863  return;
2864 
2865  // Avoid exceeding the target's limit.
2866  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2867  Cand.RPDelta.Excess,
2868  TryCand, Cand, RegExcess, TRI,
2869  DAG->MF))
2870  return;
2871 
2872  // Avoid increasing the max critical pressure in the scheduled region.
2873  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2874  Cand.RPDelta.CriticalMax,
2875  TryCand, Cand, RegCritical, TRI,
2876  DAG->MF))
2877  return;
2878 
2879  // We only compare a subset of features when comparing nodes between
2880  // Top and Bottom boundary. Some properties are simply incomparable, in many
2881  // other instances we should only override the other boundary if something
2882  // is a clear good pick on one boundary. Skip heuristics that are more
2883  // "tie-breaking" in nature.
2884  bool SameBoundary = Zone != nullptr;
2885  if (SameBoundary) {
2886  // For loops that are acyclic path limited, aggressively schedule for
2887  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2888  // heuristics to take precedence.
2889  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2890  tryLatency(TryCand, Cand, *Zone))
2891  return;
2892 
2893  // Prioritize instructions that read unbuffered resources by stall cycles.
2894  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2895  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2896  return;
2897  }
2898 
2899  // Keep clustered nodes together to encourage downstream peephole
2900  // optimizations which may reduce resource requirements.
2901  //
2902  // This is a best effort to set things up for a post-RA pass. Optimizations
2903  // like generating loads of multiple registers should ideally be done within
2904  // the scheduler pass by combining the loads during DAG postprocessing.
2905  const SUnit *CandNextClusterSU =
2906  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2907  const SUnit *TryCandNextClusterSU =
2908  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2909  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2910  Cand.SU == CandNextClusterSU,
2911  TryCand, Cand, Cluster))
2912  return;
2913 
2914  if (SameBoundary) {
2915  // Weak edges are for clustering and other constraints.
2916  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2917  getWeakLeft(Cand.SU, Cand.AtTop),
2918  TryCand, Cand, Weak))
2919  return;
2920  }
2921 
2922  // Avoid increasing the max pressure of the entire region.
2923  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2924  Cand.RPDelta.CurrentMax,
2925  TryCand, Cand, RegMax, TRI,
2926  DAG->MF))
2927  return;
2928 
2929  if (SameBoundary) {
2930  // Avoid critical resource consumption and balance the schedule.
2931  TryCand.initResourceDelta(DAG, SchedModel);
2933  TryCand, Cand, ResourceReduce))
2934  return;
2937  TryCand, Cand, ResourceDemand))
2938  return;
2939 
2940  // Avoid serializing long latency dependence chains.
2941  // For acyclic path limited loops, latency was already checked above.
2942  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2943  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2944  return;
2945 
2946  // Fall through to original instruction order.
2947  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2948  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2949  TryCand.Reason = NodeOrder;
2950  }
2951  }
2952 }
2953 
2954 /// Pick the best candidate from the queue.
2955 ///
2956 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2957 /// DAG building. To adjust for the current scheduling location we need to
2958 /// maintain the number of vreg uses remaining to be top-scheduled.
2960  const CandPolicy &ZonePolicy,
2961  const RegPressureTracker &RPTracker,
2962  SchedCandidate &Cand) {
2963  // getMaxPressureDelta temporarily modifies the tracker.
2964  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2965 
2966  ReadyQueue &Q = Zone.Available;
2967  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2968 
2969  SchedCandidate TryCand(ZonePolicy);
2970  initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
2971  // Pass SchedBoundary only when comparing nodes from the same boundary.
2972  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
2973  tryCandidate(Cand, TryCand, ZoneArg);
2974  if (TryCand.Reason != NoCand) {
2975  // Initialize resource delta if needed in case future heuristics query it.
2976  if (TryCand.ResDelta == SchedResourceDelta())
2977  TryCand.initResourceDelta(DAG, SchedModel);
2978  Cand.setBest(TryCand);
2979  DEBUG(traceCandidate(Cand));
2980  }
2981  }
2982 }
2983 
2984 /// Pick the best candidate node from either the top or bottom queue.
2986  // Schedule as far as possible in the direction of no choice. This is most
2987  // efficient, but also provides the best heuristics for CriticalPSets.
2988  if (SUnit *SU = Bot.pickOnlyChoice()) {
2989  IsTopNode = false;
2990  tracePick(Only1, false);
2991  return SU;
2992  }
2993  if (SUnit *SU = Top.pickOnlyChoice()) {
2994  IsTopNode = true;
2995  tracePick(Only1, true);
2996  return SU;
2997  }
2998  // Set the bottom-up policy based on the state of the current bottom zone and
2999  // the instructions outside the zone, including the top zone.
3000  CandPolicy BotPolicy;
3001  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3002  // Set the top-down policy based on the state of the current top zone and
3003  // the instructions outside the zone, including the bottom zone.
3004  CandPolicy TopPolicy;
3005  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3006 
3007  // See if BotCand is still valid (because we previously scheduled from Top).
3008  DEBUG(dbgs() << "Picking from Bot:\n");
3009  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3010  BotCand.Policy != BotPolicy) {
3011  BotCand.reset(CandPolicy());
3012  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3013  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3014  } else {
3015  DEBUG(traceCandidate(BotCand));
3016 #ifndef NDEBUG
3017  if (VerifyScheduling) {
3018  SchedCandidate TCand;
3019  TCand.reset(CandPolicy());
3020  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3021  assert(TCand.SU == BotCand.SU &&
3022  "Last pick result should correspond to re-picking right now");
3023  }
3024 #endif
3025  }
3026 
3027  // Check if the top Q has a better candidate.
3028  DEBUG(dbgs() << "Picking from Top:\n");
3029  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3030  TopCand.Policy != TopPolicy) {
3031  TopCand.reset(CandPolicy());
3032  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3033  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3034  } else {
3035  DEBUG(traceCandidate(TopCand));
3036 #ifndef NDEBUG
3037  if (VerifyScheduling) {
3038  SchedCandidate TCand;
3039  TCand.reset(CandPolicy());
3040  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3041  assert(TCand.SU == TopCand.SU &&
3042  "Last pick result should correspond to re-picking right now");
3043  }
3044 #endif
3045  }
3046 
3047  // Pick best from BotCand and TopCand.
3048  assert(BotCand.isValid());
3049  assert(TopCand.isValid());
3050  SchedCandidate Cand = BotCand;
3051  TopCand.Reason = NoCand;
3052  tryCandidate(Cand, TopCand, nullptr);
3053  if (TopCand.Reason != NoCand) {
3054  Cand.setBest(TopCand);
3055  DEBUG(traceCandidate(Cand));
3056  }
3057 
3058  IsTopNode = Cand.AtTop;
3059  tracePick(Cand);
3060  return Cand.SU;
3061 }
3062 
3063 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3065  if (DAG->top() == DAG->bottom()) {
3066  assert(Top.Available.empty() && Top.Pending.empty() &&
3067  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3068  return nullptr;
3069  }
3070  SUnit *SU;
3071  do {
3072  if (RegionPolicy.OnlyTopDown) {
3073  SU = Top.pickOnlyChoice();
3074  if (!SU) {
3075  CandPolicy NoPolicy;
3076  TopCand.reset(NoPolicy);
3077  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3078  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3079  tracePick(TopCand);
3080  SU = TopCand.SU;
3081  }
3082  IsTopNode = true;
3083  } else if (RegionPolicy.OnlyBottomUp) {
3084  SU = Bot.pickOnlyChoice();
3085  if (!SU) {
3086  CandPolicy NoPolicy;
3087  BotCand.reset(NoPolicy);
3088  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3089  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3090  tracePick(BotCand);
3091  SU = BotCand.SU;
3092  }
3093  IsTopNode = false;
3094  } else {
3095  SU = pickNodeBidirectional(IsTopNode);
3096  }
3097  } while (SU->isScheduled);
3098 
3099  if (SU->isTopReady())
3100  Top.removeReady(SU);
3101  if (SU->isBottomReady())
3102  Bot.removeReady(SU);
3103 
3104  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3105  return SU;
3106 }
3107 
3109 
3110  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3111  if (!isTop)
3112  ++InsertPos;
3113  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3114 
3115  // Find already scheduled copies with a single physreg dependence and move
3116  // them just above the scheduled instruction.
3117  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
3118  I != E; ++I) {
3119  if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
3120  continue;
3121  SUnit *DepSU = I->getSUnit();
3122  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3123  continue;
3124  MachineInstr *Copy = DepSU->getInstr();
3125  if (!Copy->isCopy())
3126  continue;
3127  DEBUG(dbgs() << " Rescheduling physreg copy ";
3128  I->getSUnit()->dump(DAG));
3129  DAG->moveInstruction(Copy, InsertPos);
3130  }
3131 }
3132 
3133 /// Update the scheduler's state after scheduling a node. This is the same node
3134 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3135 /// update it's state based on the current cycle before MachineSchedStrategy
3136 /// does.
3137 ///
3138 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3139 /// them here. See comments in biasPhysRegCopy.
3140 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3141  if (IsTopNode) {
3142  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3143  Top.bumpNode(SU);
3144  if (SU->hasPhysRegUses)
3145  reschedulePhysRegCopies(SU, true);
3146  } else {
3147  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3148  Bot.bumpNode(SU);
3149  if (SU->hasPhysRegDefs)
3150  reschedulePhysRegCopies(SU, false);
3151  }
3152 }
3153 
3154 /// Create the standard converging machine scheduler. This will be used as the
3155 /// default scheduler if the target does not set a default.
3157  ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
3158  // Register DAG post-processors.
3159  //
3160  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3161  // data and pass it to later mutations. Have a single mutation that gathers
3162  // the interesting nodes in one pass.
3164  return DAG;
3165 }
3166 
3168  return createGenericSchedLive(C);
3169 }
3170 
3171 static MachineSchedRegistry
3172 GenericSchedRegistry("converge", "Standard converging scheduler.",
3174 
3175 //===----------------------------------------------------------------------===//
3176 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3177 //===----------------------------------------------------------------------===//
3178 
3180  DAG = Dag;
3181  SchedModel = DAG->getSchedModel();
3182  TRI = DAG->TRI;
3183 
3184  Rem.init(DAG, SchedModel);
3185  Top.init(DAG, SchedModel, &Rem);
3186  BotRoots.clear();
3187 
3188  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3189  // or are disabled, then these HazardRecs will be disabled.
3191  if (!Top.HazardRec) {
3192  Top.HazardRec =
3194  Itin, DAG);
3195  }
3196 }
3197 
3198 
3200  Rem.CriticalPath = DAG->ExitSU.getDepth();
3201 
3202  // Some roots may not feed into ExitSU. Check all of them in case.
3204  I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3205  if ((*I)->getDepth() > Rem.CriticalPath)
3206  Rem.CriticalPath = (*I)->getDepth();
3207  }
3208  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3209  if (DumpCriticalPathLength) {
3210  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3211  }
3212 }
3213 
3214 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3215 ///
3216 /// \param Cand provides the policy and current best candidate.
3217 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3219  SchedCandidate &TryCand) {
3220 
3221  // Initialize the candidate if needed.
3222  if (!Cand.isValid()) {
3223  TryCand.Reason = NodeOrder;
3224  return;
3225  }
3226 
3227  // Prioritize instructions that read unbuffered resources by stall cycles.
3228  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3229  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3230  return;
3231 
3232  // Avoid critical resource consumption and balance the schedule.
3233  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3234  TryCand, Cand, ResourceReduce))
3235  return;
3237  Cand.ResDelta.DemandedResources,
3238  TryCand, Cand, ResourceDemand))
3239  return;
3240 
3241  // Avoid serializing long latency dependence chains.
3242  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3243  return;
3244  }
3245 
3246  // Fall through to original instruction order.
3247  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3248  TryCand.Reason = NodeOrder;
3249 }
3250 
3252  ReadyQueue &Q = Top.Available;
3253  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3254  SchedCandidate TryCand(Cand.Policy);
3255  TryCand.SU = *I;
3256  TryCand.AtTop = true;
3257  TryCand.initResourceDelta(DAG, SchedModel);
3258  tryCandidate(Cand, TryCand);
3259  if (TryCand.Reason != NoCand) {
3260  Cand.setBest(TryCand);
3261  DEBUG(traceCandidate(Cand));
3262  }
3263  }
3264 }
3265 
3266 /// Pick the next node to schedule.
3268  if (DAG->top() == DAG->bottom()) {
3269  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3270  return nullptr;
3271  }
3272  SUnit *SU;
3273  do {
3274  SU = Top.pickOnlyChoice();
3275  if (SU) {
3276  tracePick(Only1, true);
3277  } else {
3278  CandPolicy NoPolicy;
3279  SchedCandidate TopCand(NoPolicy);
3280  // Set the top-down policy based on the state of the current top zone and
3281  // the instructions outside the zone, including the bottom zone.
3282  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3283  pickNodeFromQueue(TopCand);
3284  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3285  tracePick(TopCand);
3286  SU = TopCand.SU;
3287  }
3288  } while (SU->isScheduled);
3289 
3290  IsTopNode = true;
3291  Top.removeReady(SU);
3292 
3293  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3294  return SU;
3295 }
3296 
3297 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3298 /// scheduled/remaining flags in the DAG nodes.
3299 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3300  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3301  Top.bumpNode(SU);
3302 }
3303 
3305  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
3306  /*RemoveKillFlags=*/true);
3307 }
3308 
3309 //===----------------------------------------------------------------------===//
3310 // ILP Scheduler. Currently for experimental analysis of heuristics.
3311 //===----------------------------------------------------------------------===//
3312 
3313 namespace {
3314 /// \brief Order nodes by the ILP metric.
3315 struct ILPOrder {
3316  const SchedDFSResult *DFSResult;
3317  const BitVector *ScheduledTrees;
3318  bool MaximizeILP;
3319 
3320  ILPOrder(bool MaxILP)
3321  : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
3322 
3323  /// \brief Apply a less-than relation on node priority.
3324  ///
3325  /// (Return true if A comes after B in the Q.)
3326  bool operator()(const SUnit *A, const SUnit *B) const {
3327  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3328  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3329  if (SchedTreeA != SchedTreeB) {
3330  // Unscheduled trees have lower priority.
3331  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3332  return ScheduledTrees->test(SchedTreeB);
3333 
3334  // Trees with shallower connections have have lower priority.
3335  if (DFSResult->getSubtreeLevel(SchedTreeA)
3336  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3337  return DFSResult->getSubtreeLevel(SchedTreeA)
3338  < DFSResult->getSubtreeLevel(SchedTreeB);
3339  }
3340  }
3341  if (MaximizeILP)
3342  return DFSResult->getILP(A) < DFSResult->getILP(B);
3343  else
3344  return DFSResult->getILP(A) > DFSResult->getILP(B);
3345  }
3346 };
3347 
3348 /// \brief Schedule based on the ILP metric.
3349 class ILPScheduler : public MachineSchedStrategy {
3350  ScheduleDAGMILive *DAG;
3351  ILPOrder Cmp;
3352 
3353  std::vector<SUnit*> ReadyQ;
3354 public:
3355  ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
3356 
3357  void initialize(ScheduleDAGMI *dag) override {
3358  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3359  DAG = static_cast<ScheduleDAGMILive*>(dag);
3360  DAG->computeDFSResult();
3361  Cmp.DFSResult = DAG->getDFSResult();
3362  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3363  ReadyQ.clear();
3364  }
3365 
3366  void registerRoots() override {
3367  // Restore the heap in ReadyQ with the updated DFS results.
3368  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3369  }
3370 
3371  /// Implement MachineSchedStrategy interface.
3372  /// -----------------------------------------
3373 
3374  /// Callback to select the highest priority node from the ready Q.
3375  SUnit *pickNode(bool &IsTopNode) override {
3376  if (ReadyQ.empty()) return nullptr;
3377  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3378  SUnit *SU = ReadyQ.back();
3379  ReadyQ.pop_back();
3380  IsTopNode = false;
3381  DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3382  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3383  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3384  << DAG->getDFSResult()->getSubtreeLevel(
3385  DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3386  << "Scheduling " << *SU->getInstr());
3387  return SU;
3388  }
3389 
3390  /// \brief Scheduler callback to notify that a new subtree is scheduled.
3391  void scheduleTree(unsigned SubtreeID) override {
3392  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3393  }
3394 
3395  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3396  /// DFSResults, and resort the priority Q.
3397  void schedNode(SUnit *SU, bool IsTopNode) override {
3398  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3399  }
3400 
3401  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3402 
3403  void releaseBottomNode(SUnit *SU) override {
3404  ReadyQ.push_back(SU);
3405  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3406  }
3407 };
3408 } // namespace
3409 
3411  return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
3412 }
3414  return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
3415 }
3417  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3419  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3420 
3421 //===----------------------------------------------------------------------===//
3422 // Machine Instruction Shuffler for Correctness Testing
3423 //===----------------------------------------------------------------------===//
3424 
3425 #ifndef NDEBUG
3426 namespace {
3427 /// Apply a less-than relation on the node order, which corresponds to the
3428 /// instruction order prior to scheduling. IsReverse implements greater-than.
3429 template<bool IsReverse>
3430 struct SUnitOrder {
3431  bool operator()(SUnit *A, SUnit *B) const {
3432  if (IsReverse)
3433  return A->NodeNum > B->NodeNum;
3434  else
3435  return A->NodeNum < B->NodeNum;
3436  }
3437 };
3438 
3439 /// Reorder instructions as much as possible.
3440 class InstructionShuffler : public MachineSchedStrategy {
3441  bool IsAlternating;
3442  bool IsTopDown;
3443 
3444  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3445  // gives nodes with a higher number higher priority causing the latest
3446  // instructions to be scheduled first.
3447  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
3448  TopQ;
3449  // When scheduling bottom-up, use greater-than as the queue priority.
3450  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
3451  BottomQ;
3452 public:
3453  InstructionShuffler(bool alternate, bool topdown)
3454  : IsAlternating(alternate), IsTopDown(topdown) {}
3455 
3456  void initialize(ScheduleDAGMI*) override {
3457  TopQ.clear();
3458  BottomQ.clear();
3459  }
3460 
3461  /// Implement MachineSchedStrategy interface.
3462  /// -----------------------------------------
3463 
3464  SUnit *pickNode(bool &IsTopNode) override {
3465  SUnit *SU;
3466  if (IsTopDown) {
3467  do {
3468  if (TopQ.empty()) return nullptr;
3469  SU = TopQ.top();
3470  TopQ.pop();
3471  } while (SU->isScheduled);
3472  IsTopNode = true;
3473  } else {
3474  do {
3475  if (BottomQ.empty()) return nullptr;
3476  SU = BottomQ.top();
3477  BottomQ.pop();
3478  } while (SU->isScheduled);
3479  IsTopNode = false;
3480  }
3481  if (IsAlternating)
3482  IsTopDown = !IsTopDown;
3483  return SU;
3484  }
3485 
3486  void schedNode(SUnit *SU, bool IsTopNode) override {}
3487 
3488  void releaseTopNode(SUnit *SU) override {
3489  TopQ.push(SU);
3490  }
3491  void releaseBottomNode(SUnit *SU) override {
3492  BottomQ.push(SU);
3493  }
3494 };
3495 } // namespace
3496 
3498  bool Alternate = !ForceTopDown && !ForceBottomUp;
3499  bool TopDown = !ForceBottomUp;
3500  assert((TopDown || !ForceTopDown) &&
3501  "-misched-topdown incompatible with -misched-bottomup");
3502  return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
3503 }
3505  "shuffle", "Shuffle machine instructions alternating directions",
3507 #endif // !NDEBUG
3508 
3509 //===----------------------------------------------------------------------===//
3510 // GraphWriter support for ScheduleDAGMILive.
3511 //===----------------------------------------------------------------------===//
3512 
3513 #ifndef NDEBUG
3514 namespace llvm {
3515 
3516 template<> struct GraphTraits<
3518 
3519 template<>
3521 
3523 
3524  static std::string getGraphName(const ScheduleDAG *G) {
3525  return G->MF.getName();
3526  }
3527 
3528  static bool renderGraphFromBottomUp() {
3529  return true;
3530  }
3531 
3532  static bool isNodeHidden(const SUnit *Node) {
3533  if (ViewMISchedCutoff == 0)
3534  return false;
3535  return (Node->Preds.size() > ViewMISchedCutoff
3536  || Node->Succs.size() > ViewMISchedCutoff);
3537  }
3538 
3539  /// If you want to override the dot attributes printed for a particular
3540  /// edge, override this method.
3541  static std::string getEdgeAttributes(const SUnit *Node,
3542  SUnitIterator EI,
3543  const ScheduleDAG *Graph) {
3544  if (EI.isArtificialDep())
3545  return "color=cyan,style=dashed";
3546  if (EI.isCtrlDep())
3547  return "color=blue,style=dashed";
3548  return "";
3549  }
3550 
3551  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3552  std::string Str;
3553  raw_string_ostream SS(Str);
3554  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3555  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3556  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3557  SS << "SU:" << SU->NodeNum;
3558  if (DFS)
3559  SS << " I:" << DFS->getNumInstrs(SU);
3560  return SS.str();
3561  }
3562  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3563  return G->getGraphNodeLabel(SU);
3564  }
3565 
3566  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3567  std::string Str("shape=Mrecord");
3568  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3569  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3570  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3571  if (DFS) {
3572  Str += ",style=filled,fillcolor=\"#";
3573  Str += DOT::getColorString(DFS->getSubtreeID(N));
3574  Str += '"';
3575  }
3576  return Str;
3577  }
3578 };
3579 } // namespace llvm
3580 #endif // NDEBUG
3581 
3582 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3583 /// rendered using 'dot'.
3584 ///
3585 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3586 #ifndef NDEBUG
3587  ViewGraph(this, Name, false, Title);
3588 #else
3589  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3590  << "systems with Graphviz or gv!\n";
3591 #endif // NDEBUG
3592 }
3593 
3594 /// Out-of-line implementation with no arguments is handy for gdb.
3596  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3597 }
MachineLoop * L
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:193
iterator end()
Returns an iterator past this container.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
static int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:70
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
BitVector & set()
Definition: BitVector.h:219
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
void clear()
Clear the results.
Definition: ScheduleDFS.h:129
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Segments::iterator iterator
Definition: LiveInterval.h:204
bool isArtificialDep() const
Definition: ScheduleDAG.h:673
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:53
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
size_t i
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
const MachineSchedContext * Context
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool isInQueue(SUnit *SU) const
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
static MachineSchedRegistry ILPMaxRegistry("ilpmax","Schedule bottom-up for max ILP", createILPMaxScheduler)
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
static bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
bool TrackLaneMasks
Whether lane masks should get tracked.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:625
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:605
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:389
iterator end() const
Definition: ArrayRef.h:130
bool isDead() const
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
void traceCandidate(const SchedCandidate &Cand)
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:493
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Mutate the DAG as a postpass after normal DAG building.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
getGraphNodeLabel - Return a label for an SUnit node in a visualization of the ScheduleDAG.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolve and cache a resolved scheduling class for an SUnit.
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Summarize the unscheduled region.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
void reset(const CandPolicy &NewPolicy)
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
RegisterPassParser class - Handle the addition of new machine passes.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
RegisterClassInfo * RegClassInfo
MachineBasicBlock::iterator top() const
VNInfo - Value Number Information.
Definition: LiveInterval.h:45
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::unique_ptr< MachineSchedStrategy > SchedImpl
unsigned BotReadyCycle
Definition: ScheduleDAG.h:301
virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const
Returns true if the two given memory operations should be scheduled adjacent.
void fixupKills(MachineBasicBlock *MBB)
Fix register kill flags that scheduling has made invalid.
bool isArtificial() const
isArtificial - Test if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for correctness.
Definition: ScheduleDAG.h:199
nonconst_iterator getNonConstIterator() const
INITIALIZE_PASS_BEGIN(MachineScheduler,"machine-scheduler","Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:258
virtual void startBlock(MachineBasicBlock *BB)
startBlock - Prepare to perform scheduling in the given block.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void dumpAll(const ScheduleDAG *G) const
unsigned getDependentLatency() const
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Definition: ScheduleDAG.h:581
const TargetSchedModel * getSchedModel() const
Get the machine model for instruction scheduling.
unsigned NumInstrsScheduled
The number of instructions scheduled so far.
bool isScheduled
Definition: ScheduleDAG.h:286
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
unsigned getHeight() const
getHeight - Return the height of this node, which is the length of the maximum path down to any node ...
Definition: ScheduleDAG.h:425
ArrayRef< SUnit * > elements()
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
buildSchedGraph - Build SUnits from the MachineBasicBlock that we are input.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
void clear()
clear - Clear all bits.
Definition: BitVector.h:188
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:271
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
bool isWeak() const
isWeak - Test if this a weak dependence.
Definition: ScheduleDAG.h:193
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
An individual mapping from virtual register number to SUnit.
static MachineSchedRegistry DefaultSchedRegistry("default","Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
iterator end()
Definition: LiveInterval.h:206
unsigned getPSet() const
static unsigned getWeakLeft(const SUnit *SU, bool isTop)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
Result of a LiveRange query.
Definition: LiveInterval.h:86
bool hasPhysRegUses
Definition: ScheduleDAG.h:281
Reg
All possible values of the reg field in the ModR/M byte.
bool hasPhysRegDefs
Definition: ScheduleDAG.h:282
PressureDiff & getPressureDiff(const SUnit *SU)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
void InitDAGTopologicalSorting()
InitDAGTopologicalSorting - create the initial topological ordering from the DAG to be scheduled...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static MachineSchedRegistry ShufflerRegistry("shuffle","Shuffle machine instructions alternating directions", createInstructionShuffler)
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Definition: ScheduleDAG.h:300
static const unsigned InvalidCycle
MachineBasicBlock::iterator LiveRegionEnd
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
machine scheduler
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
iterator find(SUnit *SU)
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
const_iterator end() const
bool hasReservedResource
Definition: ScheduleDAG.h:291
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
SlotIndexes pass.
Definition: SlotIndexes.h:323
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
reverse_iterator rbegin() const
Definition: ArrayRef.h:132
RegPressureTracker BotRPTracker
MachineBasicBlock * MBB
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:273
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
SchedRemainder * Rem
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
Definition: ScheduleDAG.h:270
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:303
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(const TargetInstrInfo *TII)
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the scheduler state for the next scheduling region.
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
void incExecutedResources(unsigned PIdx, unsigned Count)
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
bool isDebugValue() const
Definition: MachineInstr.h:777
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:137
std::vector< SUnit * >::iterator iterator
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
SDep - Scheduling dependency.
Definition: ScheduleDAG.h:45
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
bool isUnbuffered
Definition: ScheduleDAG.h:290
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
reverse_iterator rend() const
Definition: ArrayRef.h:133
RegisterClassInfo * RegClassInfo
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
iterator remove(iterator I)
#define P(N)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
ScheduleHazardRecognizer * HazardRec
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:516
CandReason
Represent the type of SchedCandidate found within a single queue.
ArrayRef< unsigned > getLiveThru() const
Helpers for implementing custom MachineSchedStrategy classes.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
const InstrItineraryData * getInstrItineraries() const
unsigned short Latency
Definition: ScheduleDAG.h:275
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
void dumpSchedule() const
dump the scheduled Sequence.
unsigned getZoneCritResIdx() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
bool isCopy() const
Definition: MachineInstr.h:807
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
unsigned getLatency() const
getLatency - Return the latency value for this edge, which roughly means the minimum number of cycles...
Definition: ScheduleDAG.h:139
bool empty() const
uint32_t Offset
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Apply a set of heursitics to a new candidate.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
static const unsigned End
iterator begin() const
Definition: ArrayRef.h:129
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Policy for scheduling the next instruction in the candidate's zone.
const TargetSchedModel * SchedModel
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Notify that the scheduler has finished scheduling the current region.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:339
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:69
const TargetRegisterInfo * TRI
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void clear()
Clears the set.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
static cl::opt< bool > EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true))
void apply(Opt *O, const Mod &M, const Mods &...Ms)
Definition: CommandLine.h:1156
LiveIntervals * getLIS() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:74
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * NextClusterSucc
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:479
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker. ...
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
iterator find(const KeyT &Key)
Find an element by its key.
static void DFS(BasicBlock *Root, SetVector< BasicBlock * > &Set)
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:272
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void advance()
Advance across the current instruction.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:190
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:289
ScheduleDAGInstrs *(* ScheduleDAGCtor)(MachineSchedContext *)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
bool test(unsigned Idx) const
Definition: BitVector.h:323
bool isCtrlDep() const
isCtrlDep - Test if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:670
virtual const TargetLowering * getTargetLowering() const
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static LaneBitmask getNone()
Definition: LaneBitmask.h:74
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:105
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
CHAIN = SC CHAIN, Imm128 - System call.
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:58
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
virtual void finalizeSchedule()
finalizeSchedule - Allow targets to perform final scheduling actions at the level of the whole Machin...
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heursitics to a new candidate for PostRA scheduling.
bool isTopReady() const
Definition: ScheduleDAG.h:467
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
static MachineSchedRegistry GenericSchedRegistry("converge","Standard converging scheduler.", createConveringSched)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or "(null)".
void biasCriticalPath()
Order this node's predecessor edges such that the critical path edge occurs first.
PressureChange CriticalMax
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void schedule()=0
schedule - Order nodes according to selected style, filling in the Sequence member.
virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
machine Machine Instruction Scheduler
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
MachineBasicBlock::iterator bottom() const
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
MachineBasicBlock::iterator end() const
end - Return an iterator to the bottom of the current scheduling region.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:360
unsigned getDepth() const
getDepth - Return the depth of this node, which is the length of the maximum path up to any node whic...
Definition: ScheduleDAG.h:417
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
const SUnit * getNextClusterSucc() const
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void setLatency(unsigned Lat)
setLatency - Set the latency for this edge.
Definition: ScheduleDAG.h:144
#define Success
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
unsigned getUnscheduledLatency(SUnit *SU) const
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:360
bool isCluster() const
isCluster - Test if this is an Order dependence that is marked as "cluster", meaning it is artificial...
Definition: ScheduleDAG.h:205
SUnit * getSUnit(MachineInstr *MI) const
getSUnit - Return an existing SUnit for this MI, or NULL.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
Representation of each machine instruction.
Definition: MachineInstr.h:52
Basic Alias true
void initializePostMachineSchedulerPass(PassRegistry &)
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
LiveIntervals * LIS
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
machine Machine Instruction false
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:580
virtual bool shouldScheduleAdjacent(const MachineInstr &First, const MachineInstr &Second) const
Can this target fuse the given instructions if they are scheduled adjacent.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction's critical resource consumption.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
SUnit * getSUnit() const
Definition: ScheduleDAG.h:503
LLVM_DUMP_METHOD void dump(const TargetRegisterInfo &TRI) const
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:367
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
SmallVector< unsigned, 16 > RemainingCounts
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:579
static bool isNodeHidden(const SUnit *Node)
const_iterator begin() const
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Definition: ScheduleDAG.h:266
unsigned getPSetOrMax() const
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:247
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:205
unsigned getReg() const
getReg - Returns the register number.
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
addPred - This adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:65
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:82
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:259
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool isBottomReady() const
Definition: ScheduleDAG.h:470
static MachineSchedRegistry ILPMinRegistry("ilpmin","Schedule bottom-up for min ILP", createILPMinScheduler)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
const SUnit * getNextClusterPred() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
StringRef getName() const
#define DEBUG(X)
Definition: Debug.h:100
unsigned size() const
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
void push(SUnit *SU)
void AddPred(SUnit *Y, SUnit *X)
AddPred - Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
MachineBasicBlock * BB
State specific to the current scheduling region.
IRTranslator LLVM IR MI
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
const TargetSchedModel * SchedModel
INITIALIZE_PASS(PostMachineScheduler,"postmisched","PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void setPos(MachineBasicBlock::const_iterator Pos)
bool isValid() const
Check for null.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:101
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:582
std::vector< SUnit > SUnits
Definition: ScheduleDAG.h:583
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:76
Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
const char * getResourceName(unsigned PIdx) const
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
RegPressureTracker TopRPTracker
RegPressureTracker RPTracker
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
void pickNodeFromQueue(SchedCandidate &Cand)
unsigned getResourceCount(unsigned ResIdx) const
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:404
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
handleMove - call this method to notify LiveIntervals that instruction 'mi' has been moved within a b...
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:244
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
bool isResourceLimited() const
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
void resize(size_type N)
Definition: SmallVector.h:352