LLVM  3.7.0
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/PriorityQueue.h"
22 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/Support/Debug.h"
32 #include <queue>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "misched"
37 
38 namespace llvm {
39 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
40  cl::desc("Force top-down list scheduling"));
41 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
42  cl::desc("Force bottom-up list scheduling"));
44 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
45  cl::desc("Print critical path length to stdout"));
46 }
47 
48 #ifndef NDEBUG
49 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
50  cl::desc("Pop up a window to show MISched dags after they are processed"));
51 
52 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
53  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
54 
55 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
56  cl::desc("Only schedule this function"));
57 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
58  cl::desc("Only schedule this MBB#"));
59 #else
60 static bool ViewMISchedDAGs = false;
61 #endif // NDEBUG
62 
63 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
64  cl::desc("Enable register pressure scheduling."), cl::init(true));
65 
66 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
67  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
68 
69 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
70  cl::desc("Enable load clustering."), cl::init(true));
71 
72 // Experimental heuristics
73 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
74  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
75 
76 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
77  cl::desc("Verify machine instrs before and after machine scheduling"));
78 
79 // DAG subtrees must have at least this many nodes.
80 static const unsigned MinSubtreeSize = 8;
81 
82 // Pin the vtables to this file.
83 void MachineSchedStrategy::anchor() {}
84 void ScheduleDAGMutation::anchor() {}
85 
86 //===----------------------------------------------------------------------===//
87 // Machine Instruction Scheduling Pass and Registry
88 //===----------------------------------------------------------------------===//
89 
91  MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
93 }
94 
96  delete RegClassInfo;
97 }
98 
99 namespace {
100 /// Base class for a machine scheduler class that can run at any point.
101 class MachineSchedulerBase : public MachineSchedContext,
102  public MachineFunctionPass {
103 public:
104  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
105 
106  void print(raw_ostream &O, const Module* = nullptr) const override;
107 
108 protected:
109  void scheduleRegions(ScheduleDAGInstrs &Scheduler);
110 };
111 
112 /// MachineScheduler runs after coalescing and before register allocation.
113 class MachineScheduler : public MachineSchedulerBase {
114 public:
115  MachineScheduler();
116 
117  void getAnalysisUsage(AnalysisUsage &AU) const override;
118 
119  bool runOnMachineFunction(MachineFunction&) override;
120 
121  static char ID; // Class identification, replacement for typeinfo
122 
123 protected:
124  ScheduleDAGInstrs *createMachineScheduler();
125 };
126 
127 /// PostMachineScheduler runs after shortly before code emission.
128 class PostMachineScheduler : public MachineSchedulerBase {
129 public:
130  PostMachineScheduler();
131 
132  void getAnalysisUsage(AnalysisUsage &AU) const override;
133 
134  bool runOnMachineFunction(MachineFunction&) override;
135 
136  static char ID; // Class identification, replacement for typeinfo
137 
138 protected:
139  ScheduleDAGInstrs *createPostMachineScheduler();
140 };
141 } // namespace
142 
143 char MachineScheduler::ID = 0;
144 
146 
147 INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
148  "Machine Instruction Scheduler", false, false)
152 INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
154 
155 MachineScheduler::MachineScheduler()
156 : MachineSchedulerBase(ID) {
158 }
159 
160 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
161  AU.setPreservesCFG();
166  AU.addRequired<SlotIndexes>();
171 }
172 
173 char PostMachineScheduler::ID = 0;
174 
176 
177 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
178  "PostRA Machine Instruction Scheduler", false, false)
179 
180 PostMachineScheduler::PostMachineScheduler()
181 : MachineSchedulerBase(ID) {
183 }
184 
185 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
186  AU.setPreservesCFG();
191 }
192 
194 
195 /// A dummy default scheduler factory indicates whether the scheduler
196 /// is overridden on the command line.
198  return nullptr;
199 }
200 
201 /// MachineSchedOpt allows command line selection of the scheduler.
204 MachineSchedOpt("misched",
206  cl::desc("Machine instruction scheduler to use"));
207 
209 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
211 
213  "enable-misched",
214  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
215  cl::Hidden);
216 
217 /// Forward declare the standard machine scheduler. This will be used as the
218 /// default scheduler if the target does not set a default.
221 
222 /// Decrement this iterator until reaching the top or a non-debug instr.
226  assert(I != Beg && "reached the top of the region, cannot decrement");
227  while (--I != Beg) {
228  if (!I->isDebugValue())
229  break;
230  }
231  return I;
232 }
233 
234 /// Non-const version.
238  return const_cast<MachineInstr*>(
240 }
241 
242 /// If this iterator is a debug value, increment until reaching the End or a
243 /// non-debug instruction.
247  for(; I != End; ++I) {
248  if (!I->isDebugValue())
249  break;
250  }
251  return I;
252 }
253 
254 /// Non-const version.
258  // Cast the return value to nonconst MachineInstr, then cast to an
259  // instr_iterator, which does not check for null, finally return a
260  // bundle_iterator.
262  const_cast<MachineInstr*>(
264 }
265 
266 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
267 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
268  // Select the scheduler, or set the default.
270  if (Ctor != useDefaultMachineSched)
271  return Ctor(this);
272 
273  // Get the default scheduler set by the target for this function.
274  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
275  if (Scheduler)
276  return Scheduler;
277 
278  // Default to GenericScheduler.
279  return createGenericSchedLive(this);
280 }
281 
282 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
283 /// the caller. We don't have a command line option to override the postRA
284 /// scheduler. The Target must configure it.
285 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
286  // Get the postRA scheduler set by the target for this function.
287  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
288  if (Scheduler)
289  return Scheduler;
290 
291  // Default to GenericScheduler.
292  return createGenericSchedPostRA(this);
293 }
294 
295 /// Top-level MachineScheduler pass driver.
296 ///
297 /// Visit blocks in function order. Divide each block into scheduling regions
298 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
299 /// consistent with the DAG builder, which traverses the interior of the
300 /// scheduling regions bottom-up.
301 ///
302 /// This design avoids exposing scheduling boundaries to the DAG builder,
303 /// simplifying the DAG builder's support for "special" target instructions.
304 /// At the same time the design allows target schedulers to operate across
305 /// scheduling boundaries, for example to bundle the boudary instructions
306 /// without reordering them. This creates complexity, because the target
307 /// scheduler must update the RegionBegin and RegionEnd positions cached by
308 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
309 /// design would be to split blocks at scheduling boundaries, but LLVM has a
310 /// general bias against block splitting purely for implementation simplicity.
311 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
312  if (EnableMachineSched.getNumOccurrences()) {
313  if (!EnableMachineSched)
314  return false;
315  } else if (!mf.getSubtarget().enableMachineScheduler())
316  return false;
317 
318  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
319 
320  // Initialize the context of the pass.
321  MF = &mf;
322  MLI = &getAnalysis<MachineLoopInfo>();
323  MDT = &getAnalysis<MachineDominatorTree>();
324  PassConfig = &getAnalysis<TargetPassConfig>();
325  AA = &getAnalysis<AliasAnalysis>();
326 
327  LIS = &getAnalysis<LiveIntervals>();
328 
329  if (VerifyScheduling) {
330  DEBUG(LIS->dump());
331  MF->verify(this, "Before machine scheduling.");
332  }
333  RegClassInfo->runOnMachineFunction(*MF);
334 
335  // Instantiate the selected scheduler for this target, function, and
336  // optimization level.
337  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
338  scheduleRegions(*Scheduler);
339 
340  DEBUG(LIS->dump());
341  if (VerifyScheduling)
342  MF->verify(this, "After machine scheduling.");
343  return true;
344 }
345 
346 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
347  if (skipOptnoneFunction(*mf.getFunction()))
348  return false;
349 
350  if (!mf.getSubtarget().enablePostRAScheduler()) {
351  DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
352  return false;
353  }
354  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
355 
356  // Initialize the context of the pass.
357  MF = &mf;
358  PassConfig = &getAnalysis<TargetPassConfig>();
359 
360  if (VerifyScheduling)
361  MF->verify(this, "Before post machine scheduling.");
362 
363  // Instantiate the selected scheduler for this target, function, and
364  // optimization level.
365  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
366  scheduleRegions(*Scheduler);
367 
368  if (VerifyScheduling)
369  MF->verify(this, "After post machine scheduling.");
370  return true;
371 }
372 
373 /// Return true of the given instruction should not be included in a scheduling
374 /// region.
375 ///
376 /// MachineScheduler does not currently support scheduling across calls. To
377 /// handle calls, the DAG builder needs to be modified to create register
378 /// anti/output dependencies on the registers clobbered by the call's regmask
379 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
380 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
381 /// the boundary, but there would be no benefit to postRA scheduling across
382 /// calls this late anyway.
384  MachineBasicBlock *MBB,
385  MachineFunction *MF,
386  const TargetInstrInfo *TII,
387  bool IsPostRA) {
388  return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
389 }
390 
391 /// Main driver for both MachineScheduler and PostMachineScheduler.
392 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
393  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
394  bool IsPostRA = Scheduler.isPostRA();
395 
396  // Visit all machine basic blocks.
397  //
398  // TODO: Visit blocks in global postorder or postorder within the bottom-up
399  // loop tree. Then we can optionally compute global RegPressure.
400  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
401  MBB != MBBEnd; ++MBB) {
402 
403  Scheduler.startBlock(MBB);
404 
405 #ifndef NDEBUG
406  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
407  continue;
408  if (SchedOnlyBlock.getNumOccurrences()
409  && (int)SchedOnlyBlock != MBB->getNumber())
410  continue;
411 #endif
412 
413  // Break the block into scheduling regions [I, RegionEnd), and schedule each
414  // region as soon as it is discovered. RegionEnd points the scheduling
415  // boundary at the bottom of the region. The DAG does not include RegionEnd,
416  // but the region does (i.e. the next RegionEnd is above the previous
417  // RegionBegin). If the current block has no terminator then RegionEnd ==
418  // MBB->end() for the bottom region.
419  //
420  // The Scheduler may insert instructions during either schedule() or
421  // exitRegion(), even for empty regions. So the local iterators 'I' and
422  // 'RegionEnd' are invalid across these calls.
423  //
424  // MBB::size() uses instr_iterator to count. Here we need a bundle to count
425  // as a single instruction.
426  unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
427  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
428  RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
429 
430  // Avoid decrementing RegionEnd for blocks with no terminator.
431  if (RegionEnd != MBB->end() ||
432  isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
433  --RegionEnd;
434  // Count the boundary instruction.
435  --RemainingInstrs;
436  }
437 
438  // The next region starts above the previous region. Look backward in the
439  // instruction stream until we find the nearest boundary.
440  unsigned NumRegionInstrs = 0;
441  MachineBasicBlock::iterator I = RegionEnd;
442  for(;I != MBB->begin(); --I, --RemainingInstrs) {
443  if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
444  break;
445  if (!I->isDebugValue())
446  ++NumRegionInstrs;
447  }
448  // Notify the scheduler of the region, even if we may skip scheduling
449  // it. Perhaps it still needs to be bundled.
450  Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
451 
452  // Skip empty scheduling regions (0 or 1 schedulable instructions).
453  if (I == RegionEnd || I == std::prev(RegionEnd)) {
454  // Close the current region. Bundle the terminator if needed.
455  // This invalidates 'RegionEnd' and 'I'.
456  Scheduler.exitRegion();
457  continue;
458  }
459  DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
460  << "MI Scheduling **********\n");
461  DEBUG(dbgs() << MF->getName()
462  << ":BB#" << MBB->getNumber() << " " << MBB->getName()
463  << "\n From: " << *I << " To: ";
464  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
465  else dbgs() << "End";
466  dbgs() << " RegionInstrs: " << NumRegionInstrs
467  << " Remaining: " << RemainingInstrs << "\n");
469  errs() << MF->getName();
470  errs() << ":BB# " << MBB->getNumber();
471  errs() << " " << MBB->getName() << " \n";
472  }
473 
474  // Schedule a region: possibly reorder instructions.
475  // This invalidates 'RegionEnd' and 'I'.
476  Scheduler.schedule();
477 
478  // Close the current region.
479  Scheduler.exitRegion();
480 
481  // Scheduling has invalidated the current iterator 'I'. Ask the
482  // scheduler for the top of it's scheduled region.
483  RegionEnd = Scheduler.begin();
484  }
485  assert(RemainingInstrs == 0 && "Instruction count mismatch!");
486  Scheduler.finishBlock();
487  if (Scheduler.isPostRA()) {
488  // FIXME: Ideally, no further passes should rely on kill flags. However,
489  // thumb2 size reduction is currently an exception.
490  Scheduler.fixupKills(MBB);
491  }
492  }
493  Scheduler.finalizeSchedule();
494 }
495 
496 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
497  // unimplemented
498 }
499 
502  dbgs() << Name << ": ";
503  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
504  dbgs() << Queue[i]->NodeNum << " ";
505  dbgs() << "\n";
506 }
507 
508 //===----------------------------------------------------------------------===//
509 // ScheduleDAGMI - Basic machine instruction scheduling. This is
510 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
511 // virtual registers.
512 // ===----------------------------------------------------------------------===/
513 
514 // Provide a vtable anchor.
516 }
517 
518 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
519  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
520 }
521 
522 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
523  if (SuccSU != &ExitSU) {
524  // Do not use WillCreateCycle, it assumes SD scheduling.
525  // If Pred is reachable from Succ, then the edge creates a cycle.
526  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
527  return false;
528  Topo.AddPred(SuccSU, PredDep.getSUnit());
529  }
530  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
531  // Return true regardless of whether a new edge needed to be inserted.
532  return true;
533 }
534 
535 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
536 /// NumPredsLeft reaches zero, release the successor node.
537 ///
538 /// FIXME: Adjust SuccSU height based on MinLatency.
539 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
540  SUnit *SuccSU = SuccEdge->getSUnit();
541 
542  if (SuccEdge->isWeak()) {
543  --SuccSU->WeakPredsLeft;
544  if (SuccEdge->isCluster())
545  NextClusterSucc = SuccSU;
546  return;
547  }
548 #ifndef NDEBUG
549  if (SuccSU->NumPredsLeft == 0) {
550  dbgs() << "*** Scheduling failed! ***\n";
551  SuccSU->dump(this);
552  dbgs() << " has been released too many times!\n";
553  llvm_unreachable(nullptr);
554  }
555 #endif
556  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
557  // CurrCycle may have advanced since then.
558  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
559  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
560 
561  --SuccSU->NumPredsLeft;
562  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
563  SchedImpl->releaseTopNode(SuccSU);
564 }
565 
566 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
568  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
569  I != E; ++I) {
570  releaseSucc(SU, &*I);
571  }
572 }
573 
574 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
575 /// NumSuccsLeft reaches zero, release the predecessor node.
576 ///
577 /// FIXME: Adjust PredSU height based on MinLatency.
578 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
579  SUnit *PredSU = PredEdge->getSUnit();
580 
581  if (PredEdge->isWeak()) {
582  --PredSU->WeakSuccsLeft;
583  if (PredEdge->isCluster())
584  NextClusterPred = PredSU;
585  return;
586  }
587 #ifndef NDEBUG
588  if (PredSU->NumSuccsLeft == 0) {
589  dbgs() << "*** Scheduling failed! ***\n";
590  PredSU->dump(this);
591  dbgs() << " has been released too many times!\n";
592  llvm_unreachable(nullptr);
593  }
594 #endif
595  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
596  // CurrCycle may have advanced since then.
597  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
598  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
599 
600  --PredSU->NumSuccsLeft;
601  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
602  SchedImpl->releaseBottomNode(PredSU);
603 }
604 
605 /// releasePredecessors - Call releasePred on each of SU's predecessors.
607  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
608  I != E; ++I) {
609  releasePred(SU, &*I);
610  }
611 }
612 
613 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
614 /// crossing a scheduling boundary. [begin, end) includes all instructions in
615 /// the region, including the boundary itself and single-instruction regions
616 /// that don't get scheduled.
620  unsigned regioninstrs)
621 {
622  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
623 
624  SchedImpl->initPolicy(begin, end, regioninstrs);
625 }
626 
627 /// This is normally called from the main scheduler loop but may also be invoked
628 /// by the scheduling strategy to perform additional code motion.
631  // Advance RegionBegin if the first instruction moves down.
632  if (&*RegionBegin == MI)
633  ++RegionBegin;
634 
635  // Update the instruction stream.
636  BB->splice(InsertPos, BB, MI);
637 
638  // Update LiveIntervals
639  if (LIS)
640  LIS->handleMove(MI, /*UpdateFlags=*/true);
641 
642  // Recede RegionBegin if an instruction moves above the first.
643  if (RegionBegin == InsertPos)
644  RegionBegin = MI;
645 }
646 
648 #ifndef NDEBUG
651  return false;
652  }
654 #endif
655  return true;
656 }
657 
658 /// Per-region scheduling driver, called back from
659 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
660 /// does not consider liveness or register pressure. It is useful for PostRA
661 /// scheduling and potentially other custom schedulers.
663  // Build the DAG.
664  buildSchedGraph(AA);
665 
667 
668  postprocessDAG();
669 
670  SmallVector<SUnit*, 8> TopRoots, BotRoots;
671  findRootsAndBiasEdges(TopRoots, BotRoots);
672 
673  // Initialize the strategy before modifying the DAG.
674  // This may initialize a DFSResult to be used for queue priority.
675  SchedImpl->initialize(this);
676 
677  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
678  SUnits[su].dumpAll(this));
679  if (ViewMISchedDAGs) viewGraph();
680 
681  // Initialize ready queues now that the DAG and priority data are finalized.
682  initQueues(TopRoots, BotRoots);
683 
684  bool IsTopNode = false;
685  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
686  assert(!SU->isScheduled && "Node already scheduled");
687  if (!checkSchedLimit())
688  break;
689 
690  MachineInstr *MI = SU->getInstr();
691  if (IsTopNode) {
692  assert(SU->isTopReady() && "node still has unscheduled dependencies");
693  if (&*CurrentTop == MI)
695  else
697  }
698  else {
699  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
702  if (&*priorII == MI)
703  CurrentBottom = priorII;
704  else {
705  if (&*CurrentTop == MI)
706  CurrentTop = nextIfDebug(++CurrentTop, priorII);
708  CurrentBottom = MI;
709  }
710  }
711  // Notify the scheduling strategy before updating the DAG.
712  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
713  // runs, it can then use the accurate ReadyCycle time to determine whether
714  // newly released nodes can move to the readyQ.
715  SchedImpl->schedNode(SU, IsTopNode);
716 
717  updateQueues(SU, IsTopNode);
718  }
719  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
720 
722 
723  DEBUG({
724  unsigned BBNum = begin()->getParent()->getNumber();
725  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
726  dumpSchedule();
727  dbgs() << '\n';
728  });
729 }
730 
731 /// Apply each ScheduleDAGMutation step in order.
733  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
734  Mutations[i]->apply(this);
735  }
736 }
737 
738 void ScheduleDAGMI::
740  SmallVectorImpl<SUnit*> &BotRoots) {
741  for (std::vector<SUnit>::iterator
742  I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
743  SUnit *SU = &(*I);
744  assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
745 
746  // Order predecessors so DFSResult follows the critical path.
747  SU->biasCriticalPath();
748 
749  // A SUnit is ready to top schedule if it has no predecessors.
750  if (!I->NumPredsLeft)
751  TopRoots.push_back(SU);
752  // A SUnit is ready to bottom schedule if it has no successors.
753  if (!I->NumSuccsLeft)
754  BotRoots.push_back(SU);
755  }
757 }
758 
759 /// Identify DAG roots and setup scheduler queues.
761  ArrayRef<SUnit*> BotRoots) {
762  NextClusterSucc = nullptr;
763  NextClusterPred = nullptr;
764 
765  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
766  //
767  // Nodes with unreleased weak edges can still be roots.
768  // Release top roots in forward order.
770  I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
771  SchedImpl->releaseTopNode(*I);
772  }
773  // Release bottom roots in reverse order so the higher priority nodes appear
774  // first. This is more natural and slightly more efficient.
776  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
777  SchedImpl->releaseBottomNode(*I);
778  }
779 
782 
783  SchedImpl->registerRoots();
784 
785  // Advance past initial DebugValues.
788 }
789 
790 /// Update scheduler queues after scheduling an instruction.
791 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
792  // Release dependent instructions for scheduling.
793  if (IsTopNode)
794  releaseSuccessors(SU);
795  else
797 
798  SU->isScheduled = true;
799 }
800 
801 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
803  // If first instruction was a DBG_VALUE then put it back.
804  if (FirstDbgValue) {
807  }
808 
809  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
810  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
811  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
812  MachineInstr *DbgValue = P.first;
813  MachineBasicBlock::iterator OrigPrevMI = P.second;
814  if (&*RegionBegin == DbgValue)
815  ++RegionBegin;
816  BB->splice(++OrigPrevMI, BB, DbgValue);
817  if (OrigPrevMI == std::prev(RegionEnd))
818  RegionEnd = DbgValue;
819  }
820  DbgValues.clear();
821  FirstDbgValue = nullptr;
822 }
823 
824 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
826  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
827  if (SUnit *SU = getSUnit(&(*MI)))
828  SU->dump(this);
829  else
830  dbgs() << "Missing SUnit\n";
831  }
832 }
833 #endif
834 
835 //===----------------------------------------------------------------------===//
836 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
837 // preservation.
838 //===----------------------------------------------------------------------===//
839 
841  delete DFSResult;
842 }
843 
844 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
845 /// crossing a scheduling boundary. [begin, end) includes all instructions in
846 /// the region, including the boundary itself and single-instruction regions
847 /// that don't get scheduled.
851  unsigned regioninstrs)
852 {
853  // ScheduleDAGMI initializes SchedImpl's per-region policy.
854  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
855 
856  // For convenience remember the end of the liveness region.
857  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
858 
860 
861  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
862 }
863 
864 // Setup the register pressure trackers for the top scheduled top and bottom
865 // scheduled regions.
869 
870  // Close the RPTracker to finalize live ins.
872 
873  DEBUG(RPTracker.dump());
874 
875  // Initialize the live ins and live outs.
878 
879  // Close one end of the tracker so we can call
880  // getMaxUpward/DownwardPressureDelta before advancing across any
881  // instructions. This converts currently live regs into live ins/outs.
884 
886  if (!BotRPTracker.getLiveThru().empty()) {
888  DEBUG(dbgs() << "Live Thru: ";
890  };
891 
892  // For each live out vreg reduce the pressure change associated with other
893  // uses of the same vreg below the live-out reaching def.
895 
896  // Account for liveness generated by the region boundary.
897  if (LiveRegionEnd != RegionEnd) {
898  SmallVector<unsigned, 8> LiveUses;
899  BotRPTracker.recede(&LiveUses);
900  updatePressureDiffs(LiveUses);
901  }
902 
903  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
904 
905  // Cache the list of excess pressure sets in this region. This will also track
906  // the max pressure in the scheduled code for these sets.
907  RegionCriticalPSets.clear();
908  const std::vector<unsigned> &RegionPressure =
910  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
911  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
912  if (RegionPressure[i] > Limit) {
914  << " Limit " << Limit
915  << " Actual " << RegionPressure[i] << "\n");
916  RegionCriticalPSets.push_back(PressureChange(i));
917  }
918  }
919  DEBUG(dbgs() << "Excess PSets: ";
920  for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
922  RegionCriticalPSets[i].getPSet()) << " ";
923  dbgs() << "\n");
924 }
925 
928  const std::vector<unsigned> &NewMaxPressure) {
929  const PressureDiff &PDiff = getPressureDiff(SU);
930  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
931  for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
932  I != E; ++I) {
933  if (!I->isValid())
934  break;
935  unsigned ID = I->getPSet();
936  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
937  ++CritIdx;
938  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
939  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
940  && NewMaxPressure[ID] <= INT16_MAX)
941  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
942  }
943  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
944  if (NewMaxPressure[ID] >= Limit - 2) {
945  DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
946  << NewMaxPressure[ID]
947  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
948  << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
949  }
950  }
951 }
952 
953 /// Update the PressureDiff array for liveness after scheduling this
954 /// instruction.
956  for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
957  /// FIXME: Currently assuming single-use physregs.
958  unsigned Reg = LiveUses[LUIdx];
959  DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
960  if (!TRI->isVirtualRegister(Reg))
961  continue;
962 
963  // This may be called before CurrentBottom has been initialized. However,
964  // BotRPTracker must have a valid position. We want the value live into the
965  // instruction or live out of the block, so ask for the previous
966  // instruction's live-out.
967  const LiveInterval &LI = LIS->getInterval(Reg);
968  VNInfo *VNI;
971  if (I == BB->end())
972  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
973  else {
974  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
975  VNI = LRQ.valueIn();
976  }
977  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
978  assert(VNI && "No live value at use.");
980  UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
981  SUnit *SU = UI->SU;
982  DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
983  << *SU->getInstr());
984  // If this use comes before the reaching def, it cannot be a last use, so
985  // descrease its pressure change.
986  if (!SU->isScheduled && SU != &ExitSU) {
987  LiveQueryResult LRQ
988  = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
989  if (LRQ.valueIn() == VNI)
990  getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
991  }
992  }
993  }
994 }
995 
996 /// schedule - Called back from MachineScheduler::runOnMachineFunction
997 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
998 /// only includes instructions that have DAG nodes, not scheduling boundaries.
999 ///
1000 /// This is a skeletal driver, with all the functionality pushed into helpers,
1001 /// so that it can be easilly extended by experimental schedulers. Generally,
1002 /// implementing MachineSchedStrategy should be sufficient to implement a new
1003 /// scheduling algorithm. However, if a scheduler further subclasses
1004 /// ScheduleDAGMILive then it will want to override this virtual method in order
1005 /// to update any specialized state.
1008 
1010 
1011  postprocessDAG();
1012 
1013  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1014  findRootsAndBiasEdges(TopRoots, BotRoots);
1015 
1016  // Initialize the strategy before modifying the DAG.
1017  // This may initialize a DFSResult to be used for queue priority.
1018  SchedImpl->initialize(this);
1019 
1020  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
1021  SUnits[su].dumpAll(this));
1022  if (ViewMISchedDAGs) viewGraph();
1023 
1024  // Initialize ready queues now that the DAG and priority data are finalized.
1025  initQueues(TopRoots, BotRoots);
1026 
1027  if (ShouldTrackPressure) {
1028  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1030  }
1031 
1032  bool IsTopNode = false;
1033  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
1034  assert(!SU->isScheduled && "Node already scheduled");
1035  if (!checkSchedLimit())
1036  break;
1037 
1038  scheduleMI(SU, IsTopNode);
1039 
1040  if (DFSResult) {
1041  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1042  if (!ScheduledTrees.test(SubtreeID)) {
1043  ScheduledTrees.set(SubtreeID);
1044  DFSResult->scheduleTree(SubtreeID);
1045  SchedImpl->scheduleTree(SubtreeID);
1046  }
1047  }
1048 
1049  // Notify the scheduling strategy after updating the DAG.
1050  SchedImpl->schedNode(SU, IsTopNode);
1051 
1052  updateQueues(SU, IsTopNode);
1053  }
1054  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1055 
1056  placeDebugValues();
1057 
1058  DEBUG({
1059  unsigned BBNum = begin()->getParent()->getNumber();
1060  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1061  dumpSchedule();
1062  dbgs() << '\n';
1063  });
1064 }
1065 
1066 /// Build the DAG and setup three register pressure trackers.
1068  if (!ShouldTrackPressure) {
1069  RPTracker.reset();
1070  RegionCriticalPSets.clear();
1071  buildSchedGraph(AA);
1072  return;
1073  }
1074 
1075  // Initialize the register pressure tracker used by buildSchedGraph.
1077  /*TrackUntiedDefs=*/true);
1078 
1079  // Account for liveness generate by the region boundary.
1080  if (LiveRegionEnd != RegionEnd)
1081  RPTracker.recede();
1082 
1083  // Build the DAG, and compute current register pressure.
1085 
1086  // Initialize top/bottom trackers after computing region pressure.
1087  initRegPressure();
1088 }
1089 
1091  if (!DFSResult)
1092  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1093  DFSResult->clear();
1095  DFSResult->resize(SUnits.size());
1098 }
1099 
1100 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1101 /// only provides the critical path for single block loops. To handle loops that
1102 /// span blocks, we could use the vreg path latencies provided by
1103 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1104 /// available for use in the scheduler.
1105 ///
1106 /// The cyclic path estimation identifies a def-use pair that crosses the back
1107 /// edge and considers the depth and height of the nodes. For example, consider
1108 /// the following instruction sequence where each instruction has unit latency
1109 /// and defines an epomymous virtual register:
1110 ///
1111 /// a->b(a,c)->c(b)->d(c)->exit
1112 ///
1113 /// The cyclic critical path is a two cycles: b->c->b
1114 /// The acyclic critical path is four cycles: a->b->c->d->exit
1115 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1116 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1117 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1118 /// LiveInDepth = depth(b) = len(a->b) = 1
1119 ///
1120 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1121 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1122 /// CyclicCriticalPath = min(2, 2) = 2
1123 ///
1124 /// This could be relevant to PostRA scheduling, but is currently implemented
1125 /// assuming LiveIntervals.
1127  // This only applies to single block loop.
1128  if (!BB->isSuccessor(BB))
1129  return 0;
1130 
1131  unsigned MaxCyclicLatency = 0;
1132  // Visit each live out vreg def to find def/use pairs that cross iterations.
1134  for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
1135  RI != RE; ++RI) {
1136  unsigned Reg = *RI;
1137  if (!TRI->isVirtualRegister(Reg))
1138  continue;
1139  const LiveInterval &LI = LIS->getInterval(Reg);
1140  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1141  if (!DefVNI)
1142  continue;
1143 
1144  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1145  const SUnit *DefSU = getSUnit(DefMI);
1146  if (!DefSU)
1147  continue;
1148 
1149  unsigned LiveOutHeight = DefSU->getHeight();
1150  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1151  // Visit all local users of the vreg def.
1153  UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
1154  if (UI->SU == &ExitSU)
1155  continue;
1156 
1157  // Only consider uses of the phi.
1158  LiveQueryResult LRQ =
1159  LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
1160  if (!LRQ.valueIn()->isPHIDef())
1161  continue;
1162 
1163  // Assume that a path spanning two iterations is a cycle, which could
1164  // overestimate in strange cases. This allows cyclic latency to be
1165  // estimated as the minimum slack of the vreg's depth or height.
1166  unsigned CyclicLatency = 0;
1167  if (LiveOutDepth > UI->SU->getDepth())
1168  CyclicLatency = LiveOutDepth - UI->SU->getDepth();
1169 
1170  unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
1171  if (LiveInHeight > LiveOutHeight) {
1172  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1173  CyclicLatency = LiveInHeight - LiveOutHeight;
1174  }
1175  else
1176  CyclicLatency = 0;
1177 
1178  DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1179  << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
1180  if (CyclicLatency > MaxCyclicLatency)
1181  MaxCyclicLatency = CyclicLatency;
1182  }
1183  }
1184  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1185  return MaxCyclicLatency;
1186 }
1187 
1188 /// Move an instruction and update register pressure.
1189 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1190  // Move the instruction to its new location in the instruction stream.
1191  MachineInstr *MI = SU->getInstr();
1192 
1193  if (IsTopNode) {
1194  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1195  if (&*CurrentTop == MI)
1197  else {
1199  TopRPTracker.setPos(MI);
1200  }
1201 
1202  if (ShouldTrackPressure) {
1203  // Update top scheduled pressure.
1205  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1207  }
1208  }
1209  else {
1210  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1211  MachineBasicBlock::iterator priorII =
1213  if (&*priorII == MI)
1214  CurrentBottom = priorII;
1215  else {
1216  if (&*CurrentTop == MI) {
1217  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1219  }
1221  CurrentBottom = MI;
1222  }
1223  if (ShouldTrackPressure) {
1224  // Update bottom scheduled pressure.
1225  SmallVector<unsigned, 8> LiveUses;
1226  BotRPTracker.recede(&LiveUses);
1227  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1229  updatePressureDiffs(LiveUses);
1230  }
1231  }
1232 }
1233 
1234 //===----------------------------------------------------------------------===//
1235 // LoadClusterMutation - DAG post-processing to cluster loads.
1236 //===----------------------------------------------------------------------===//
1237 
1238 namespace {
1239 /// \brief Post-process the DAG to create cluster edges between neighboring
1240 /// loads.
1241 class LoadClusterMutation : public ScheduleDAGMutation {
1242  struct LoadInfo {
1243  SUnit *SU;
1244  unsigned BaseReg;
1245  unsigned Offset;
1246  LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
1247  : SU(su), BaseReg(reg), Offset(ofs) {}
1248 
1249  bool operator<(const LoadInfo &RHS) const {
1250  return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
1251  }
1252  };
1253 
1254  const TargetInstrInfo *TII;
1255  const TargetRegisterInfo *TRI;
1256 public:
1257  LoadClusterMutation(const TargetInstrInfo *tii,
1258  const TargetRegisterInfo *tri)
1259  : TII(tii), TRI(tri) {}
1260 
1261  void apply(ScheduleDAGMI *DAG) override;
1262 protected:
1263  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1264 };
1265 } // anonymous
1266 
1267 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1268  ScheduleDAGMI *DAG) {
1270  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1271  SUnit *SU = Loads[Idx];
1272  unsigned BaseReg;
1273  unsigned Offset;
1274  if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1275  LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1276  }
1277  if (LoadRecords.size() < 2)
1278  return;
1279  std::sort(LoadRecords.begin(), LoadRecords.end());
1280  unsigned ClusterLength = 1;
1281  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1282  if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1283  ClusterLength = 1;
1284  continue;
1285  }
1286 
1287  SUnit *SUa = LoadRecords[Idx].SU;
1288  SUnit *SUb = LoadRecords[Idx+1].SU;
1289  if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1290  && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1291 
1292  DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1293  << SUb->NodeNum << ")\n");
1294  // Copy successor edges from SUa to SUb. Interleaving computation
1295  // dependent on SUa can prevent load combining due to register reuse.
1296  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1297  // loads should have effectively the same inputs.
1299  SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1300  if (SI->getSUnit() == SUb)
1301  continue;
1302  DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1303  DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1304  }
1305  ++ClusterLength;
1306  }
1307  else
1308  ClusterLength = 1;
1309  }
1310 }
1311 
1312 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1314  // Map DAG NodeNum to store chain ID.
1315  DenseMap<unsigned, unsigned> StoreChainIDs;
1316  // Map each store chain to a set of dependent loads.
1317  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1318  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1319  SUnit *SU = &DAG->SUnits[Idx];
1320  if (!SU->getInstr()->mayLoad())
1321  continue;
1322  unsigned ChainPredID = DAG->SUnits.size();
1324  PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1325  if (PI->isCtrl()) {
1326  ChainPredID = PI->getSUnit()->NodeNum;
1327  break;
1328  }
1329  }
1330  // Check if this chain-like pred has been seen
1331  // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1332  unsigned NumChains = StoreChainDependents.size();
1333  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1334  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1335  if (Result.second)
1336  StoreChainDependents.resize(NumChains + 1);
1337  StoreChainDependents[Result.first->second].push_back(SU);
1338  }
1339  // Iterate over the store chains.
1340  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1341  clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1342 }
1343 
1344 //===----------------------------------------------------------------------===//
1345 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1346 //===----------------------------------------------------------------------===//
1347 
1348 namespace {
1349 /// \brief Post-process the DAG to create cluster edges between instructions
1350 /// that may be fused by the processor into a single operation.
1351 class MacroFusion : public ScheduleDAGMutation {
1352  const TargetInstrInfo *TII;
1353 public:
1354  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1355 
1356  void apply(ScheduleDAGMI *DAG) override;
1357 };
1358 } // anonymous
1359 
1360 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1361 /// fused operations.
1362 void MacroFusion::apply(ScheduleDAGMI *DAG) {
1363  // For now, assume targets can only fuse with the branch.
1364  MachineInstr *Branch = DAG->ExitSU.getInstr();
1365  if (!Branch)
1366  return;
1367 
1368  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1369  SUnit *SU = &DAG->SUnits[--Idx];
1370  if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1371  continue;
1372 
1373  // Create a single weak edge from SU to ExitSU. The only effect is to cause
1374  // bottom-up scheduling to heavily prioritize the clustered SU. There is no
1375  // need to copy predecessor edges from ExitSU to SU, since top-down
1376  // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1377  // of SU, we could create an artificial edge from the deepest root, but it
1378  // hasn't been needed yet.
1379  bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1380  (void)Success;
1381  assert(Success && "No DAG nodes should be reachable from ExitSU");
1382 
1383  DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1384  break;
1385  }
1386 }
1387 
1388 //===----------------------------------------------------------------------===//
1389 // CopyConstrain - DAG post-processing to encourage copy elimination.
1390 //===----------------------------------------------------------------------===//
1391 
1392 namespace {
1393 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1394 /// the one use that defines the copy's source vreg, most likely an induction
1395 /// variable increment.
1396 class CopyConstrain : public ScheduleDAGMutation {
1397  // Transient state.
1398  SlotIndex RegionBeginIdx;
1399  // RegionEndIdx is the slot index of the last non-debug instruction in the
1400  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1401  SlotIndex RegionEndIdx;
1402 public:
1403  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1404 
1405  void apply(ScheduleDAGMI *DAG) override;
1406 
1407 protected:
1408  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1409 };
1410 } // anonymous
1411 
1412 /// constrainLocalCopy handles two possibilities:
1413 /// 1) Local src:
1414 /// I0: = dst
1415 /// I1: src = ...
1416 /// I2: = dst
1417 /// I3: dst = src (copy)
1418 /// (create pred->succ edges I0->I1, I2->I1)
1419 ///
1420 /// 2) Local copy:
1421 /// I0: dst = src (copy)
1422 /// I1: = dst
1423 /// I2: src = ...
1424 /// I3: = dst
1425 /// (create pred->succ edges I1->I2, I3->I2)
1426 ///
1427 /// Although the MachineScheduler is currently constrained to single blocks,
1428 /// this algorithm should handle extended blocks. An EBB is a set of
1429 /// contiguously numbered blocks such that the previous block in the EBB is
1430 /// always the single predecessor.
1431 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1432  LiveIntervals *LIS = DAG->getLIS();
1433  MachineInstr *Copy = CopySU->getInstr();
1434 
1435  // Check for pure vreg copies.
1436  unsigned SrcReg = Copy->getOperand(1).getReg();
1438  return;
1439 
1440  unsigned DstReg = Copy->getOperand(0).getReg();
1442  return;
1443 
1444  // Check if either the dest or source is local. If it's live across a back
1445  // edge, it's not local. Note that if both vregs are live across the back
1446  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1447  // If both the copy's source and dest are local live intervals, then we
1448  // should treat the dest as the global for the purpose of adding
1449  // constraints. This adds edges from source's other uses to the copy.
1450  unsigned LocalReg = SrcReg;
1451  unsigned GlobalReg = DstReg;
1452  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1453  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1454  LocalReg = DstReg;
1455  GlobalReg = SrcReg;
1456  LocalLI = &LIS->getInterval(LocalReg);
1457  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1458  return;
1459  }
1460  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1461 
1462  // Find the global segment after the start of the local LI.
1463  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1464  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1465  // local live range. We could create edges from other global uses to the local
1466  // start, but the coalescer should have already eliminated these cases, so
1467  // don't bother dealing with it.
1468  if (GlobalSegment == GlobalLI->end())
1469  return;
1470 
1471  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1472  // returned the next global segment. But if GlobalSegment overlaps with
1473  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1474  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1475  if (GlobalSegment->contains(LocalLI->beginIndex()))
1476  ++GlobalSegment;
1477 
1478  if (GlobalSegment == GlobalLI->end())
1479  return;
1480 
1481  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1482  if (GlobalSegment != GlobalLI->begin()) {
1483  // Two address defs have no hole.
1484  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1485  GlobalSegment->start)) {
1486  return;
1487  }
1488  // If the prior global segment may be defined by the same two-address
1489  // instruction that also defines LocalLI, then can't make a hole here.
1490  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1491  LocalLI->beginIndex())) {
1492  return;
1493  }
1494  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1495  // it would be a disconnected component in the live range.
1496  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1497  "Disconnected LRG within the scheduling region.");
1498  }
1499  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1500  if (!GlobalDef)
1501  return;
1502 
1503  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1504  if (!GlobalSU)
1505  return;
1506 
1507  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1508  // constraining the uses of the last local def to precede GlobalDef.
1509  SmallVector<SUnit*,8> LocalUses;
1510  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1511  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1512  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1514  I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1515  I != E; ++I) {
1516  if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1517  continue;
1518  if (I->getSUnit() == GlobalSU)
1519  continue;
1520  if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1521  return;
1522  LocalUses.push_back(I->getSUnit());
1523  }
1524  // Open the top of the GlobalLI hole by constraining any earlier global uses
1525  // to precede the start of LocalLI.
1526  SmallVector<SUnit*,8> GlobalUses;
1527  MachineInstr *FirstLocalDef =
1528  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1529  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1531  I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1532  if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1533  continue;
1534  if (I->getSUnit() == FirstLocalSU)
1535  continue;
1536  if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1537  return;
1538  GlobalUses.push_back(I->getSUnit());
1539  }
1540  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1541  // Add the weak edges.
1543  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1544  DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1545  << GlobalSU->NodeNum << ")\n");
1546  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1547  }
1549  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1550  DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1551  << FirstLocalSU->NodeNum << ")\n");
1552  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1553  }
1554 }
1555 
1556 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1557 /// copy elimination.
1559  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1560 
1561  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1562  if (FirstPos == DAG->end())
1563  return;
1564  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1565  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1566  &*priorNonDebug(DAG->end(), DAG->begin()));
1567 
1568  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1569  SUnit *SU = &DAG->SUnits[Idx];
1570  if (!SU->getInstr()->isCopy())
1571  continue;
1572 
1573  constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1574  }
1575 }
1576 
1577 //===----------------------------------------------------------------------===//
1578 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1579 // and possibly other custom schedulers.
1580 //===----------------------------------------------------------------------===//
1581 
1582 static const unsigned InvalidCycle = ~0U;
1583 
1585 
1587  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1588  // Destroying and reconstructing it is very expensive though. So keep
1589  // invalid, placeholder HazardRecs.
1590  if (HazardRec && HazardRec->isEnabled()) {
1591  delete HazardRec;
1592  HazardRec = nullptr;
1593  }
1594  Available.clear();
1595  Pending.clear();
1596  CheckPending = false;
1597  NextSUs.clear();
1598  CurrCycle = 0;
1599  CurrMOps = 0;
1600  MinReadyCycle = UINT_MAX;
1601  ExpectedLatency = 0;
1602  DependentLatency = 0;
1603  RetiredMOps = 0;
1604  MaxExecutedResCount = 0;
1605  ZoneCritResIdx = 0;
1606  IsResourceLimited = false;
1607  ReservedCycles.clear();
1608 #ifndef NDEBUG
1609  // Track the maximum number of stall cycles that could arise either from the
1610  // latency of a DAG edge or the number of cycles that a processor resource is
1611  // reserved (SchedBoundary::ReservedCycles).
1612  MaxObservedStall = 0;
1613 #endif
1614  // Reserve a zero-count for invalid CritResIdx.
1615  ExecutedResCounts.resize(1);
1616  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1617 }
1618 
1619 void SchedRemainder::
1620 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1621  reset();
1622  if (!SchedModel->hasInstrSchedModel())
1623  return;
1625  for (std::vector<SUnit>::iterator
1626  I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1627  const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1628  RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1629  * SchedModel->getMicroOpFactor();
1631  PI = SchedModel->getWriteProcResBegin(SC),
1632  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1633  unsigned PIdx = PI->ProcResourceIdx;
1634  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1635  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1636  }
1637  }
1638 }
1639 
1640 void SchedBoundary::
1642  reset();
1643  DAG = dag;
1644  SchedModel = smodel;
1645  Rem = rem;
1646  if (SchedModel->hasInstrSchedModel()) {
1647  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1649  }
1650 }
1651 
1652 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1653 /// these "soft stalls" differently than the hard stall cycles based on CPU
1654 /// resources and computed by checkHazard(). A fully in-order model
1655 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1656 /// available for scheduling until they are ready. However, a weaker in-order
1657 /// model may use this for heuristics. For example, if a processor has in-order
1658 /// behavior when reading certain resources, this may come into play.
1660  if (!SU->isUnbuffered)
1661  return 0;
1662 
1663  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1664  if (ReadyCycle > CurrCycle)
1665  return ReadyCycle - CurrCycle;
1666  return 0;
1667 }
1668 
1669 /// Compute the next cycle at which the given processor resource can be
1670 /// scheduled.
1671 unsigned SchedBoundary::
1672 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1673  unsigned NextUnreserved = ReservedCycles[PIdx];
1674  // If this resource has never been used, always return cycle zero.
1675  if (NextUnreserved == InvalidCycle)
1676  return 0;
1677  // For bottom-up scheduling add the cycles needed for the current operation.
1678  if (!isTop())
1679  NextUnreserved += Cycles;
1680  return NextUnreserved;
1681 }
1682 
1683 /// Does this SU have a hazard within the current instruction group.
1684 ///
1685 /// The scheduler supports two modes of hazard recognition. The first is the
1686 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1687 /// supports highly complicated in-order reservation tables
1688 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1689 ///
1690 /// The second is a streamlined mechanism that checks for hazards based on
1691 /// simple counters that the scheduler itself maintains. It explicitly checks
1692 /// for instruction dispatch limitations, including the number of micro-ops that
1693 /// can dispatch per cycle.
1694 ///
1695 /// TODO: Also check whether the SU must start a new group.
1697  if (HazardRec->isEnabled()
1699  return true;
1700  }
1701  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1702  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1703  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1704  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1705  return true;
1706  }
1708  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1710  PI = SchedModel->getWriteProcResBegin(SC),
1711  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1712  unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
1713  if (NRCycle > CurrCycle) {
1714 #ifndef NDEBUG
1715  MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
1716 #endif
1717  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1718  << SchedModel->getResourceName(PI->ProcResourceIdx)
1719  << "=" << NRCycle << "c\n");
1720  return true;
1721  }
1722  }
1723  }
1724  return false;
1725 }
1726 
1727 // Find the unscheduled node in ReadySUs with the highest latency.
1728 unsigned SchedBoundary::
1730  SUnit *LateSU = nullptr;
1731  unsigned RemLatency = 0;
1732  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1733  I != E; ++I) {
1734  unsigned L = getUnscheduledLatency(*I);
1735  if (L > RemLatency) {
1736  RemLatency = L;
1737  LateSU = *I;
1738  }
1739  }
1740  if (LateSU) {
1741  DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1742  << LateSU->NodeNum << ") " << RemLatency << "c\n");
1743  }
1744  return RemLatency;
1745 }
1746 
1747 // Count resources in this zone and the remaining unscheduled
1748 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1749 // resource index, or zero if the zone is issue limited.
1750 unsigned SchedBoundary::
1751 getOtherResourceCount(unsigned &OtherCritIdx) {
1752  OtherCritIdx = 0;
1754  return 0;
1755 
1756  unsigned OtherCritCount = Rem->RemIssueCount
1757  + (RetiredMOps * SchedModel->getMicroOpFactor());
1758  DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
1759  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1760  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1761  PIdx != PEnd; ++PIdx) {
1762  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1763  if (OtherCount > OtherCritCount) {
1764  OtherCritCount = OtherCount;
1765  OtherCritIdx = PIdx;
1766  }
1767  }
1768  if (OtherCritIdx) {
1769  DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
1770  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1771  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1772  }
1773  return OtherCritCount;
1774 }
1775 
1776 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1777  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1778 
1779 #ifndef NDEBUG
1780  // ReadyCycle was been bumped up to the CurrCycle when this node was
1781  // scheduled, but CurrCycle may have been eagerly advanced immediately after
1782  // scheduling, so may now be greater than ReadyCycle.
1783  if (ReadyCycle > CurrCycle)
1784  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
1785 #endif
1786 
1787  if (ReadyCycle < MinReadyCycle)
1788  MinReadyCycle = ReadyCycle;
1789 
1790  // Check for interlocks first. For the purpose of other heuristics, an
1791  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1792  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1793  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
1794  Pending.push(SU);
1795  else
1796  Available.push(SU);
1797 
1798  // Record this node as an immediate dependent of the scheduled node.
1799  NextSUs.insert(SU);
1800 }
1801 
1803  if (SU->isScheduled)
1804  return;
1805 
1806  releaseNode(SU, SU->TopReadyCycle);
1807 }
1808 
1810  if (SU->isScheduled)
1811  return;
1812 
1813  releaseNode(SU, SU->BotReadyCycle);
1814 }
1815 
1816 /// Move the boundary of scheduled code by one cycle.
1817 void SchedBoundary::bumpCycle(unsigned NextCycle) {
1818  if (SchedModel->getMicroOpBufferSize() == 0) {
1819  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1820  if (MinReadyCycle > NextCycle)
1821  NextCycle = MinReadyCycle;
1822  }
1823  // Update the current micro-ops, which will issue in the next cycle.
1824  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
1825  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
1826 
1827  // Decrement DependentLatency based on the next cycle.
1828  if ((NextCycle - CurrCycle) > DependentLatency)
1829  DependentLatency = 0;
1830  else
1831  DependentLatency -= (NextCycle - CurrCycle);
1832 
1833  if (!HazardRec->isEnabled()) {
1834  // Bypass HazardRec virtual calls.
1835  CurrCycle = NextCycle;
1836  }
1837  else {
1838  // Bypass getHazardType calls in case of long latency.
1839  for (; CurrCycle != NextCycle; ++CurrCycle) {
1840  if (isTop())
1842  else
1844  }
1845  }
1846  CheckPending = true;
1847  unsigned LFactor = SchedModel->getLatencyFactor();
1848  IsResourceLimited =
1849  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1850  > (int)LFactor;
1851 
1852  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
1853 }
1854 
1855 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
1856  ExecutedResCounts[PIdx] += Count;
1857  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
1858  MaxExecutedResCount = ExecutedResCounts[PIdx];
1859 }
1860 
1861 /// Add the given processor resource to this scheduled zone.
1862 ///
1863 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
1864 /// during which this resource is consumed.
1865 ///
1866 /// \return the next cycle at which the instruction may execute without
1867 /// oversubscribing resources.
1868 unsigned SchedBoundary::
1869 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
1870  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1871  unsigned Count = Factor * Cycles;
1872  DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
1873  << " +" << Cycles << "x" << Factor << "u\n");
1874 
1875  // Update Executed resources counts.
1876  incExecutedResources(PIdx, Count);
1877  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1878  Rem->RemainingCounts[PIdx] -= Count;
1879 
1880  // Check if this resource exceeds the current critical resource. If so, it
1881  // becomes the critical resource.
1882  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
1883  ZoneCritResIdx = PIdx;
1884  DEBUG(dbgs() << " *** Critical resource "
1885  << SchedModel->getResourceName(PIdx) << ": "
1886  << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
1887  }
1888  // For reserved resources, record the highest cycle using the resource.
1889  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
1890  if (NextAvailable > CurrCycle) {
1891  DEBUG(dbgs() << " Resource conflict: "
1892  << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
1893  << NextAvailable << "\n");
1894  }
1895  return NextAvailable;
1896 }
1897 
1898 /// Move the boundary of scheduled code by one SUnit.
1900  // Update the reservation table.
1901  if (HazardRec->isEnabled()) {
1902  if (!isTop() && SU->isCall) {
1903  // Calls are scheduled with their preceding instructions. For bottom-up
1904  // scheduling, clear the pipeline state before emitting.
1905  HazardRec->Reset();
1906  }
1908  }
1909  // checkHazard should prevent scheduling multiple instructions per cycle that
1910  // exceed the issue width.
1911  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1912  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
1913  assert(
1914  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
1915  "Cannot schedule this instruction's MicroOps in the current cycle.");
1916 
1917  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1918  DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
1919 
1920  unsigned NextCycle = CurrCycle;
1921  switch (SchedModel->getMicroOpBufferSize()) {
1922  case 0:
1923  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
1924  break;
1925  case 1:
1926  if (ReadyCycle > NextCycle) {
1927  NextCycle = ReadyCycle;
1928  DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
1929  }
1930  break;
1931  default:
1932  // We don't currently model the OOO reorder buffer, so consider all
1933  // scheduled MOps to be "retired". We do loosely model in-order resource
1934  // latency. If this instruction uses an in-order resource, account for any
1935  // likely stall cycles.
1936  if (SU->isUnbuffered && ReadyCycle > NextCycle)
1937  NextCycle = ReadyCycle;
1938  break;
1939  }
1940  RetiredMOps += IncMOps;
1941 
1942  // Update resource counts and critical resource.
1943  if (SchedModel->hasInstrSchedModel()) {
1944  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
1945  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
1946  Rem->RemIssueCount -= DecRemIssue;
1947  if (ZoneCritResIdx) {
1948  // Scale scheduled micro-ops for comparing with the critical resource.
1949  unsigned ScaledMOps =
1950  RetiredMOps * SchedModel->getMicroOpFactor();
1951 
1952  // If scaled micro-ops are now more than the previous critical resource by
1953  // a full cycle, then micro-ops issue becomes critical.
1954  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
1955  >= (int)SchedModel->getLatencyFactor()) {
1956  ZoneCritResIdx = 0;
1957  DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
1958  << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
1959  }
1960  }
1962  PI = SchedModel->getWriteProcResBegin(SC),
1963  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1964  unsigned RCycle =
1965  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
1966  if (RCycle > NextCycle)
1967  NextCycle = RCycle;
1968  }
1969  if (SU->hasReservedResource) {
1970  // For reserved resources, record the highest cycle using the resource.
1971  // For top-down scheduling, this is the cycle in which we schedule this
1972  // instruction plus the number of cycles the operations reserves the
1973  // resource. For bottom-up is it simply the instruction's cycle.
1975  PI = SchedModel->getWriteProcResBegin(SC),
1976  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1977  unsigned PIdx = PI->ProcResourceIdx;
1978  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
1979  if (isTop()) {
1980  ReservedCycles[PIdx] =
1981  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
1982  }
1983  else
1984  ReservedCycles[PIdx] = NextCycle;
1985  }
1986  }
1987  }
1988  }
1989  // Update ExpectedLatency and DependentLatency.
1990  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1991  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1992  if (SU->getDepth() > TopLatency) {
1993  TopLatency = SU->getDepth();
1994  DEBUG(dbgs() << " " << Available.getName()
1995  << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
1996  }
1997  if (SU->getHeight() > BotLatency) {
1998  BotLatency = SU->getHeight();
1999  DEBUG(dbgs() << " " << Available.getName()
2000  << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2001  }
2002  // If we stall for any reason, bump the cycle.
2003  if (NextCycle > CurrCycle) {
2004  bumpCycle(NextCycle);
2005  }
2006  else {
2007  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2008  // resource limited. If a stall occurred, bumpCycle does this.
2009  unsigned LFactor = SchedModel->getLatencyFactor();
2010  IsResourceLimited =
2011  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2012  > (int)LFactor;
2013  }
2014  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2015  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2016  // one cycle. Since we commonly reach the max MOps here, opportunistically
2017  // bump the cycle to avoid uselessly checking everything in the readyQ.
2018  CurrMOps += IncMOps;
2019  while (CurrMOps >= SchedModel->getIssueWidth()) {
2020  DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2021  << " at cycle " << CurrCycle << '\n');
2022  bumpCycle(++NextCycle);
2023  }
2025 }
2026 
2027 /// Release pending ready nodes in to the available queue. This makes them
2028 /// visible to heuristics.
2030  // If the available queue is empty, it is safe to reset MinReadyCycle.
2031  if (Available.empty())
2032  MinReadyCycle = UINT_MAX;
2033 
2034  // Check to see if any of the pending instructions are ready to issue. If
2035  // so, add them to the available queue.
2036  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2037  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2038  SUnit *SU = *(Pending.begin()+i);
2039  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2040 
2041  if (ReadyCycle < MinReadyCycle)
2042  MinReadyCycle = ReadyCycle;
2043 
2044  if (!IsBuffered && ReadyCycle > CurrCycle)
2045  continue;
2046 
2047  if (checkHazard(SU))
2048  continue;
2049 
2050  Available.push(SU);
2051  Pending.remove(Pending.begin()+i);
2052  --i; --e;
2053  }
2054  DEBUG(if (!Pending.empty()) Pending.dump());
2055  CheckPending = false;
2056 }
2057 
2058 /// Remove SU from the ready set for this boundary.
2060  if (Available.isInQueue(SU))
2062  else {
2063  assert(Pending.isInQueue(SU) && "bad ready count");
2064  Pending.remove(Pending.find(SU));
2065  }
2066 }
2067 
2068 /// If this queue only has one ready candidate, return it. As a side effect,
2069 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2070 /// one node is ready. If multiple instructions are ready, return NULL.
2072  if (CheckPending)
2073  releasePending();
2074 
2075  if (CurrMOps > 0) {
2076  // Defer any ready instrs that now have a hazard.
2077  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2078  if (checkHazard(*I)) {
2079  Pending.push(*I);
2080  I = Available.remove(I);
2081  continue;
2082  }
2083  ++I;
2084  }
2085  }
2086  for (unsigned i = 0; Available.empty(); ++i) {
2087 // FIXME: Re-enable assert once PR20057 is resolved.
2088 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2089 // "permanent hazard");
2090  (void)i;
2091  bumpCycle(CurrCycle + 1);
2092  releasePending();
2093  }
2094  if (Available.size() == 1)
2095  return *Available.begin();
2096  return nullptr;
2097 }
2098 
2099 #ifndef NDEBUG
2100 // This is useful information to dump after bumpNode.
2101 // Note that the Queue contents are more useful before pickNodeFromQueue.
2103  unsigned ResFactor;
2104  unsigned ResCount;
2105  if (ZoneCritResIdx) {
2106  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2107  ResCount = getResourceCount(ZoneCritResIdx);
2108  }
2109  else {
2110  ResFactor = SchedModel->getMicroOpFactor();
2111  ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2112  }
2113  unsigned LFactor = SchedModel->getLatencyFactor();
2114  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2115  << " Retired: " << RetiredMOps;
2116  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2117  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2118  << ResCount / ResFactor << " "
2119  << SchedModel->getResourceName(ZoneCritResIdx)
2120  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2121  << (IsResourceLimited ? " - Resource" : " - Latency")
2122  << " limited.\n";
2123 }
2124 #endif
2125 
2126 //===----------------------------------------------------------------------===//
2127 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2128 //===----------------------------------------------------------------------===//
2129 
2132  const TargetSchedModel *SchedModel) {
2134  return;
2135 
2136  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2138  PI = SchedModel->getWriteProcResBegin(SC),
2139  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2140  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2141  ResDelta.CritResources += PI->Cycles;
2142  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2143  ResDelta.DemandedResources += PI->Cycles;
2144  }
2145 }
2146 
2147 /// Set the CandPolicy given a scheduling zone given the current resources and
2148 /// latencies inside and outside the zone.
2150  bool IsPostRA,
2151  SchedBoundary &CurrZone,
2152  SchedBoundary *OtherZone) {
2153  // Apply preemptive heuristics based on the total latency and resources
2154  // inside and outside this zone. Potential stalls should be considered before
2155  // following this policy.
2156 
2157  // Compute remaining latency. We need this both to determine whether the
2158  // overall schedule has become latency-limited and whether the instructions
2159  // outside this zone are resource or latency limited.
2160  //
2161  // The "dependent" latency is updated incrementally during scheduling as the
2162  // max height/depth of scheduled nodes minus the cycles since it was
2163  // scheduled:
2164  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2165  //
2166  // The "independent" latency is the max ready queue depth:
2167  // ILat = max N.depth for N in Available|Pending
2168  //
2169  // RemainingLatency is the greater of independent and dependent latency.
2170  unsigned RemLatency = CurrZone.getDependentLatency();
2171  RemLatency = std::max(RemLatency,
2172  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2173  RemLatency = std::max(RemLatency,
2174  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2175 
2176  // Compute the critical resource outside the zone.
2177  unsigned OtherCritIdx = 0;
2178  unsigned OtherCount =
2179  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2180 
2181  bool OtherResLimited = false;
2182  if (SchedModel->hasInstrSchedModel()) {
2183  unsigned LFactor = SchedModel->getLatencyFactor();
2184  OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2185  }
2186  // Schedule aggressively for latency in PostRA mode. We don't check for
2187  // acyclic latency during PostRA, and highly out-of-order processors will
2188  // skip PostRA scheduling.
2189  if (!OtherResLimited) {
2190  if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2191  Policy.ReduceLatency |= true;
2192  DEBUG(dbgs() << " " << CurrZone.Available.getName()
2193  << " RemainingLatency " << RemLatency << " + "
2194  << CurrZone.getCurrCycle() << "c > CritPath "
2195  << Rem.CriticalPath << "\n");
2196  }
2197  }
2198  // If the same resource is limiting inside and outside the zone, do nothing.
2199  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2200  return;
2201 
2202  DEBUG(
2203  if (CurrZone.isResourceLimited()) {
2204  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2206  << "\n";
2207  }
2208  if (OtherResLimited)
2209  dbgs() << " RemainingLimit: "
2210  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2211  if (!CurrZone.isResourceLimited() && !OtherResLimited)
2212  dbgs() << " Latency limited both directions.\n");
2213 
2214  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2215  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2216 
2217  if (OtherResLimited)
2218  Policy.DemandResIdx = OtherCritIdx;
2219 }
2220 
2221 #ifndef NDEBUG
2224  switch (Reason) {
2225  case NoCand: return "NOCAND ";
2226  case PhysRegCopy: return "PREG-COPY";
2227  case RegExcess: return "REG-EXCESS";
2228  case RegCritical: return "REG-CRIT ";
2229  case Stall: return "STALL ";
2230  case Cluster: return "CLUSTER ";
2231  case Weak: return "WEAK ";
2232  case RegMax: return "REG-MAX ";
2233  case ResourceReduce: return "RES-REDUCE";
2234  case ResourceDemand: return "RES-DEMAND";
2235  case TopDepthReduce: return "TOP-DEPTH ";
2236  case TopPathReduce: return "TOP-PATH ";
2237  case BotHeightReduce:return "BOT-HEIGHT";
2238  case BotPathReduce: return "BOT-PATH ";
2239  case NextDefUse: return "DEF-USE ";
2240  case NodeOrder: return "ORDER ";
2241  };
2242  llvm_unreachable("Unknown reason!");
2243 }
2244 
2246  PressureChange P;
2247  unsigned ResIdx = 0;
2248  unsigned Latency = 0;
2249  switch (Cand.Reason) {
2250  default:
2251  break;
2252  case RegExcess:
2253  P = Cand.RPDelta.Excess;
2254  break;
2255  case RegCritical:
2256  P = Cand.RPDelta.CriticalMax;
2257  break;
2258  case RegMax:
2259  P = Cand.RPDelta.CurrentMax;
2260  break;
2261  case ResourceReduce:
2262  ResIdx = Cand.Policy.ReduceResIdx;
2263  break;
2264  case ResourceDemand:
2265  ResIdx = Cand.Policy.DemandResIdx;
2266  break;
2267  case TopDepthReduce:
2268  Latency = Cand.SU->getDepth();
2269  break;
2270  case TopPathReduce:
2271  Latency = Cand.SU->getHeight();
2272  break;
2273  case BotHeightReduce:
2274  Latency = Cand.SU->getHeight();
2275  break;
2276  case BotPathReduce:
2277  Latency = Cand.SU->getDepth();
2278  break;
2279  }
2280  dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2281  if (P.isValid())
2282  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2283  << ":" << P.getUnitInc() << " ";
2284  else
2285  dbgs() << " ";
2286  if (ResIdx)
2287  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2288  else
2289  dbgs() << " ";
2290  if (Latency)
2291  dbgs() << " " << Latency << " cycles ";
2292  else
2293  dbgs() << " ";
2294  dbgs() << '\n';
2295 }
2296 #endif
2297 
2298 /// Return true if this heuristic determines order.
2299 static bool tryLess(int TryVal, int CandVal,
2303  if (TryVal < CandVal) {
2304  TryCand.Reason = Reason;
2305  return true;
2306  }
2307  if (TryVal > CandVal) {
2308  if (Cand.Reason > Reason)
2309  Cand.Reason = Reason;
2310  return true;
2311  }
2312  Cand.setRepeat(Reason);
2313  return false;
2314 }
2315 
2316 static bool tryGreater(int TryVal, int CandVal,
2320  if (TryVal > CandVal) {
2321  TryCand.Reason = Reason;
2322  return true;
2323  }
2324  if (TryVal < CandVal) {
2325  if (Cand.Reason > Reason)
2326  Cand.Reason = Reason;
2327  return true;
2328  }
2329  Cand.setRepeat(Reason);
2330  return false;
2331 }
2332 
2335  SchedBoundary &Zone) {
2336  if (Zone.isTop()) {
2337  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2338  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2339  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2340  return true;
2341  }
2342  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2343  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2344  return true;
2345  }
2346  else {
2347  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2348  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2349  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2350  return true;
2351  }
2352  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2353  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2354  return true;
2355  }
2356  return false;
2357 }
2358 
2360  bool IsTop) {
2361  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2362  << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
2363 }
2364 
2366  assert(dag->hasVRegLiveness() &&
2367  "(PreRA)GenericScheduler needs vreg liveness");
2368  DAG = static_cast<ScheduleDAGMILive*>(dag);
2369  SchedModel = DAG->getSchedModel();
2370  TRI = DAG->TRI;
2371 
2372  Rem.init(DAG, SchedModel);
2373  Top.init(DAG, SchedModel, &Rem);
2374  Bot.init(DAG, SchedModel, &Rem);
2375 
2376  // Initialize resource counts.
2377 
2378  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2379  // are disabled, then these HazardRecs will be disabled.
2381  if (!Top.HazardRec) {
2382  Top.HazardRec =
2383  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2384  Itin, DAG);
2385  }
2386  if (!Bot.HazardRec) {
2387  Bot.HazardRec =
2388  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2389  Itin, DAG);
2390  }
2391 }
2392 
2393 /// Initialize the per-region scheduling policy.
2396  unsigned NumRegionInstrs) {
2397  const MachineFunction &MF = *Begin->getParent()->getParent();
2398  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2399 
2400  // Avoid setting up the register pressure tracker for small regions to save
2401  // compile time. As a rough heuristic, only track pressure when the number of
2402  // schedulable instructions exceeds half the integer register file.
2403  RegionPolicy.ShouldTrackPressure = true;
2404  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2405  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2406  if (TLI->isTypeLegal(LegalIntVT)) {
2407  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2408  TLI->getRegClassFor(LegalIntVT));
2409  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2410  }
2411  }
2412 
2413  // For generic targets, we default to bottom-up, because it's simpler and more
2414  // compile-time optimizations have been implemented in that direction.
2415  RegionPolicy.OnlyBottomUp = true;
2416 
2417  // Allow the subtarget to override default policy.
2418  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
2419  NumRegionInstrs);
2420 
2421  // After subtarget overrides, apply command line options.
2422  if (!EnableRegPressure)
2423  RegionPolicy.ShouldTrackPressure = false;
2424 
2425  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2426  // e.g. -misched-bottomup=false allows scheduling in both directions.
2427  assert((!ForceTopDown || !ForceBottomUp) &&
2428  "-misched-topdown incompatible with -misched-bottomup");
2429  if (ForceBottomUp.getNumOccurrences() > 0) {
2430  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2431  if (RegionPolicy.OnlyBottomUp)
2432  RegionPolicy.OnlyTopDown = false;
2433  }
2434  if (ForceTopDown.getNumOccurrences() > 0) {
2435  RegionPolicy.OnlyTopDown = ForceTopDown;
2436  if (RegionPolicy.OnlyTopDown)
2437  RegionPolicy.OnlyBottomUp = false;
2438  }
2439 }
2440 
2441 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2442 /// critical path by more cycles than it takes to drain the instruction buffer.
2443 /// We estimate an upper bounds on in-flight instructions as:
2444 ///
2445 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2446 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2447 /// InFlightResources = InFlightIterations * LoopResources
2448 ///
2449 /// TODO: Check execution resources in addition to IssueCount.
2452  return;
2453 
2454  // Scaled number of cycles per loop iteration.
2455  unsigned IterCount =
2457  Rem.RemIssueCount);
2458  // Scaled acyclic critical path.
2459  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2460  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2461  unsigned InFlightCount =
2462  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2463  unsigned BufferLimit =
2465 
2466  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2467 
2468  DEBUG(dbgs() << "IssueCycles="
2470  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2471  << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2472  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2473  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2475  dbgs() << " ACYCLIC LATENCY LIMIT\n");
2476 }
2477 
2479  Rem.CriticalPath = DAG->ExitSU.getDepth();
2480 
2481  // Some roots may not feed into ExitSU. Check all of them in case.
2482  for (std::vector<SUnit*>::const_iterator
2483  I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2484  if ((*I)->getDepth() > Rem.CriticalPath)
2485  Rem.CriticalPath = (*I)->getDepth();
2486  }
2487  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2488  if (DumpCriticalPathLength) {
2489  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2490  }
2491 
2492  if (EnableCyclicPath) {
2493  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2494  checkAcyclicLatency();
2495  }
2496 }
2497 
2498 static bool tryPressure(const PressureChange &TryP,
2499  const PressureChange &CandP,
2503  int TryRank = TryP.getPSetOrMax();
2504  int CandRank = CandP.getPSetOrMax();
2505  // If both candidates affect the same set, go with the smallest increase.
2506  if (TryRank == CandRank) {
2507  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2508  Reason);
2509  }
2510  // If one candidate decreases and the other increases, go with it.
2511  // Invalid candidates have UnitInc==0.
2512  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2513  Reason)) {
2514  return true;
2515  }
2516  // If the candidates are decreasing pressure, reverse priority.
2517  if (TryP.getUnitInc() < 0)
2518  std::swap(TryRank, CandRank);
2519  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2520 }
2521 
2522 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2523  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2524 }
2525 
2526 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2527 /// their physreg def/use.
2528 ///
2529 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2530 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2531 /// with the operation that produces or consumes the physreg. We'll do this when
2532 /// regalloc has support for parallel copies.
2533 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2534  const MachineInstr *MI = SU->getInstr();
2535  if (!MI->isCopy())
2536  return 0;
2537 
2538  unsigned ScheduledOper = isTop ? 1 : 0;
2539  unsigned UnscheduledOper = isTop ? 0 : 1;
2540  // If we have already scheduled the physreg produce/consumer, immediately
2541  // schedule the copy.
2543  MI->getOperand(ScheduledOper).getReg()))
2544  return 1;
2545  // If the physreg is at the boundary, defer it. Otherwise schedule it
2546  // immediately to free the dependent. We can hoist the copy later.
2547  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2549  MI->getOperand(UnscheduledOper).getReg()))
2550  return AtBoundary ? -1 : 1;
2551  return 0;
2552 }
2553 
2554 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2555 /// hierarchical. This may be more efficient than a graduated cost model because
2556 /// we don't need to evaluate all aspects of the model for each node in the
2557 /// queue. But it's really done to make the heuristics easier to debug and
2558 /// statistically analyze.
2559 ///
2560 /// \param Cand provides the policy and current best candidate.
2561 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2562 /// \param Zone describes the scheduled zone that we are extending.
2563 /// \param RPTracker describes reg pressure within the scheduled zone.
2564 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2566  SchedCandidate &TryCand,
2567  SchedBoundary &Zone,
2568  const RegPressureTracker &RPTracker,
2569  RegPressureTracker &TempTracker) {
2570 
2571  if (DAG->isTrackingPressure()) {
2572  // Always initialize TryCand's RPDelta.
2573  if (Zone.isTop()) {
2574  TempTracker.getMaxDownwardPressureDelta(
2575  TryCand.SU->getInstr(),
2576  TryCand.RPDelta,
2577  DAG->getRegionCriticalPSets(),
2578  DAG->getRegPressure().MaxSetPressure);
2579  }
2580  else {
2581  if (VerifyScheduling) {
2582  TempTracker.getMaxUpwardPressureDelta(
2583  TryCand.SU->getInstr(),
2584  &DAG->getPressureDiff(TryCand.SU),
2585  TryCand.RPDelta,
2586  DAG->getRegionCriticalPSets(),
2587  DAG->getRegPressure().MaxSetPressure);
2588  }
2589  else {
2590  RPTracker.getUpwardPressureDelta(
2591  TryCand.SU->getInstr(),
2592  DAG->getPressureDiff(TryCand.SU),
2593  TryCand.RPDelta,
2594  DAG->getRegionCriticalPSets(),
2595  DAG->getRegPressure().MaxSetPressure);
2596  }
2597  }
2598  }
2599  DEBUG(if (TryCand.RPDelta.Excess.isValid())
2600  dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
2602  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2603 
2604  // Initialize the candidate if needed.
2605  if (!Cand.isValid()) {
2606  TryCand.Reason = NodeOrder;
2607  return;
2608  }
2609 
2610  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2611  biasPhysRegCopy(Cand.SU, Zone.isTop()),
2612  TryCand, Cand, PhysRegCopy))
2613  return;
2614 
2615  // Avoid exceeding the target's limit.
2616  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2617  Cand.RPDelta.Excess,
2618  TryCand, Cand, RegExcess))
2619  return;
2620 
2621  // Avoid increasing the max critical pressure in the scheduled region.
2622  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2623  Cand.RPDelta.CriticalMax,
2624  TryCand, Cand, RegCritical))
2625  return;
2626 
2627  // For loops that are acyclic path limited, aggressively schedule for latency.
2628  // This can result in very long dependence chains scheduled in sequence, so
2629  // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2630  if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
2631  && tryLatency(TryCand, Cand, Zone))
2632  return;
2633 
2634  // Prioritize instructions that read unbuffered resources by stall cycles.
2635  if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
2636  Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2637  return;
2638 
2639  // Keep clustered nodes together to encourage downstream peephole
2640  // optimizations which may reduce resource requirements.
2641  //
2642  // This is a best effort to set things up for a post-RA pass. Optimizations
2643  // like generating loads of multiple registers should ideally be done within
2644  // the scheduler pass by combining the loads during DAG postprocessing.
2645  const SUnit *NextClusterSU =
2646  Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2647  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2648  TryCand, Cand, Cluster))
2649  return;
2650 
2651  // Weak edges are for clustering and other constraints.
2652  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2653  getWeakLeft(Cand.SU, Zone.isTop()),
2654  TryCand, Cand, Weak)) {
2655  return;
2656  }
2657  // Avoid increasing the max pressure of the entire region.
2658  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2659  Cand.RPDelta.CurrentMax,
2660  TryCand, Cand, RegMax))
2661  return;
2662 
2663  // Avoid critical resource consumption and balance the schedule.
2664  TryCand.initResourceDelta(DAG, SchedModel);
2665  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2666  TryCand, Cand, ResourceReduce))
2667  return;
2669  Cand.ResDelta.DemandedResources,
2670  TryCand, Cand, ResourceDemand))
2671  return;
2672 
2673  // Avoid serializing long latency dependence chains.
2674  // For acyclic path limited loops, latency was already checked above.
2675  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2676  && tryLatency(TryCand, Cand, Zone)) {
2677  return;
2678  }
2679 
2680  // Prefer immediate defs/users of the last scheduled instruction. This is a
2681  // local pressure avoidance strategy that also makes the machine code
2682  // readable.
2683  if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
2684  TryCand, Cand, NextDefUse))
2685  return;
2686 
2687  // Fall through to original instruction order.
2688  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2689  || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2690  TryCand.Reason = NodeOrder;
2691  }
2692 }
2693 
2694 /// Pick the best candidate from the queue.
2695 ///
2696 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2697 /// DAG building. To adjust for the current scheduling location we need to
2698 /// maintain the number of vreg uses remaining to be top-scheduled.
2700  const RegPressureTracker &RPTracker,
2701  SchedCandidate &Cand) {
2702  ReadyQueue &Q = Zone.Available;
2703 
2704  DEBUG(Q.dump());
2705 
2706  // getMaxPressureDelta temporarily modifies the tracker.
2707  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2708 
2709  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2710 
2711  SchedCandidate TryCand(Cand.Policy);
2712  TryCand.SU = *I;
2713  tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2714  if (TryCand.Reason != NoCand) {
2715  // Initialize resource delta if needed in case future heuristics query it.
2716  if (TryCand.ResDelta == SchedResourceDelta())
2717  TryCand.initResourceDelta(DAG, SchedModel);
2718  Cand.setBest(TryCand);
2719  DEBUG(traceCandidate(Cand));
2720  }
2721  }
2722 }
2723 
2724 /// Pick the best candidate node from either the top or bottom queue.
2726  // Schedule as far as possible in the direction of no choice. This is most
2727  // efficient, but also provides the best heuristics for CriticalPSets.
2728  if (SUnit *SU = Bot.pickOnlyChoice()) {
2729  IsTopNode = false;
2730  DEBUG(dbgs() << "Pick Bot NOCAND\n");
2731  return SU;
2732  }
2733  if (SUnit *SU = Top.pickOnlyChoice()) {
2734  IsTopNode = true;
2735  DEBUG(dbgs() << "Pick Top NOCAND\n");
2736  return SU;
2737  }
2738  CandPolicy NoPolicy;
2739  SchedCandidate BotCand(NoPolicy);
2740  SchedCandidate TopCand(NoPolicy);
2741  // Set the bottom-up policy based on the state of the current bottom zone and
2742  // the instructions outside the zone, including the top zone.
2743  setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
2744  // Set the top-down policy based on the state of the current top zone and
2745  // the instructions outside the zone, including the bottom zone.
2746  setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
2747 
2748  // Prefer bottom scheduling when heuristics are silent.
2749  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2750  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2751 
2752  // If either Q has a single candidate that provides the least increase in
2753  // Excess pressure, we can immediately schedule from that Q.
2754  //
2755  // RegionCriticalPSets summarizes the pressure within the scheduled region and
2756  // affects picking from either Q. If scheduling in one direction must
2757  // increase pressure for one of the excess PSets, then schedule in that
2758  // direction first to provide more freedom in the other direction.
2759  if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2760  || (BotCand.Reason == RegCritical
2761  && !BotCand.isRepeat(RegCritical)))
2762  {
2763  IsTopNode = false;
2764  tracePick(BotCand, IsTopNode);
2765  return BotCand.SU;
2766  }
2767  // Check if the top Q has a better candidate.
2768  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2769  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2770 
2771  // Choose the queue with the most important (lowest enum) reason.
2772  if (TopCand.Reason < BotCand.Reason) {
2773  IsTopNode = true;
2774  tracePick(TopCand, IsTopNode);
2775  return TopCand.SU;
2776  }
2777  // Otherwise prefer the bottom candidate, in node order if all else failed.
2778  IsTopNode = false;
2779  tracePick(BotCand, IsTopNode);
2780  return BotCand.SU;
2781 }
2782 
2783 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2785  if (DAG->top() == DAG->bottom()) {
2786  assert(Top.Available.empty() && Top.Pending.empty() &&
2787  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2788  return nullptr;
2789  }
2790  SUnit *SU;
2791  do {
2792  if (RegionPolicy.OnlyTopDown) {
2793  SU = Top.pickOnlyChoice();
2794  if (!SU) {
2795  CandPolicy NoPolicy;
2796  SchedCandidate TopCand(NoPolicy);
2797  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2798  assert(TopCand.Reason != NoCand && "failed to find a candidate");
2799  tracePick(TopCand, true);
2800  SU = TopCand.SU;
2801  }
2802  IsTopNode = true;
2803  }
2804  else if (RegionPolicy.OnlyBottomUp) {
2805  SU = Bot.pickOnlyChoice();
2806  if (!SU) {
2807  CandPolicy NoPolicy;
2808  SchedCandidate BotCand(NoPolicy);
2809  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2810  assert(BotCand.Reason != NoCand && "failed to find a candidate");
2811  tracePick(BotCand, false);
2812  SU = BotCand.SU;
2813  }
2814  IsTopNode = false;
2815  }
2816  else {
2817  SU = pickNodeBidirectional(IsTopNode);
2818  }
2819  } while (SU->isScheduled);
2820 
2821  if (SU->isTopReady())
2822  Top.removeReady(SU);
2823  if (SU->isBottomReady())
2824  Bot.removeReady(SU);
2825 
2826  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2827  return SU;
2828 }
2829 
2831 
2832  MachineBasicBlock::iterator InsertPos = SU->getInstr();
2833  if (!isTop)
2834  ++InsertPos;
2835  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2836 
2837  // Find already scheduled copies with a single physreg dependence and move
2838  // them just above the scheduled instruction.
2839  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2840  I != E; ++I) {
2841  if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2842  continue;
2843  SUnit *DepSU = I->getSUnit();
2844  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2845  continue;
2846  MachineInstr *Copy = DepSU->getInstr();
2847  if (!Copy->isCopy())
2848  continue;
2849  DEBUG(dbgs() << " Rescheduling physreg copy ";
2850  I->getSUnit()->dump(DAG));
2851  DAG->moveInstruction(Copy, InsertPos);
2852  }
2853 }
2854 
2855 /// Update the scheduler's state after scheduling a node. This is the same node
2856 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
2857 /// update it's state based on the current cycle before MachineSchedStrategy
2858 /// does.
2859 ///
2860 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2861 /// them here. See comments in biasPhysRegCopy.
2862 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2863  if (IsTopNode) {
2864  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
2865  Top.bumpNode(SU);
2866  if (SU->hasPhysRegUses)
2867  reschedulePhysRegCopies(SU, true);
2868  }
2869  else {
2870  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
2871  Bot.bumpNode(SU);
2872  if (SU->hasPhysRegDefs)
2873  reschedulePhysRegCopies(SU, false);
2874  }
2875 }
2876 
2877 /// Create the standard converging machine scheduler. This will be used as the
2878 /// default scheduler if the target does not set a default.
2880  ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
2881  // Register DAG post-processors.
2882  //
2883  // FIXME: extend the mutation API to allow earlier mutations to instantiate
2884  // data and pass it to later mutations. Have a single mutation that gathers
2885  // the interesting nodes in one pass.
2886  DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
2887  if (EnableLoadCluster && DAG->TII->enableClusterLoads())
2888  DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
2889  if (EnableMacroFusion)
2890  DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
2891  return DAG;
2892 }
2893 
2894 static MachineSchedRegistry
2895 GenericSchedRegistry("converge", "Standard converging scheduler.",
2897 
2898 //===----------------------------------------------------------------------===//
2899 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
2900 //===----------------------------------------------------------------------===//
2901 
2903  DAG = Dag;
2904  SchedModel = DAG->getSchedModel();
2905  TRI = DAG->TRI;
2906 
2907  Rem.init(DAG, SchedModel);
2908  Top.init(DAG, SchedModel, &Rem);
2909  BotRoots.clear();
2910 
2911  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
2912  // or are disabled, then these HazardRecs will be disabled.
2914  if (!Top.HazardRec) {
2915  Top.HazardRec =
2917  Itin, DAG);
2918  }
2919 }
2920 
2921 
2923  Rem.CriticalPath = DAG->ExitSU.getDepth();
2924 
2925  // Some roots may not feed into ExitSU. Check all of them in case.
2927  I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
2928  if ((*I)->getDepth() > Rem.CriticalPath)
2929  Rem.CriticalPath = (*I)->getDepth();
2930  }
2931  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
2932  if (DumpCriticalPathLength) {
2933  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
2934  }
2935 }
2936 
2937 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
2938 ///
2939 /// \param Cand provides the policy and current best candidate.
2940 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2942  SchedCandidate &TryCand) {
2943 
2944  // Initialize the candidate if needed.
2945  if (!Cand.isValid()) {
2946  TryCand.Reason = NodeOrder;
2947  return;
2948  }
2949 
2950  // Prioritize instructions that read unbuffered resources by stall cycles.
2951  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
2952  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2953  return;
2954 
2955  // Avoid critical resource consumption and balance the schedule.
2956  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2957  TryCand, Cand, ResourceReduce))
2958  return;
2960  Cand.ResDelta.DemandedResources,
2961  TryCand, Cand, ResourceDemand))
2962  return;
2963 
2964  // Avoid serializing long latency dependence chains.
2965  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
2966  return;
2967  }
2968 
2969  // Fall through to original instruction order.
2970  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
2971  TryCand.Reason = NodeOrder;
2972 }
2973 
2975  ReadyQueue &Q = Top.Available;
2976 
2977  DEBUG(Q.dump());
2978 
2979  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2980  SchedCandidate TryCand(Cand.Policy);
2981  TryCand.SU = *I;
2982  TryCand.initResourceDelta(DAG, SchedModel);
2983  tryCandidate(Cand, TryCand);
2984  if (TryCand.Reason != NoCand) {
2985  Cand.setBest(TryCand);
2986  DEBUG(traceCandidate(Cand));
2987  }
2988  }
2989 }
2990 
2991 /// Pick the next node to schedule.
2993  if (DAG->top() == DAG->bottom()) {
2994  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
2995  return nullptr;
2996  }
2997  SUnit *SU;
2998  do {
2999  SU = Top.pickOnlyChoice();
3000  if (!SU) {
3001  CandPolicy NoPolicy;
3002  SchedCandidate TopCand(NoPolicy);
3003  // Set the top-down policy based on the state of the current top zone and
3004  // the instructions outside the zone, including the bottom zone.
3005  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3006  pickNodeFromQueue(TopCand);
3007  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3008  tracePick(TopCand, true);
3009  SU = TopCand.SU;
3010  }
3011  } while (SU->isScheduled);
3012 
3013  IsTopNode = true;
3014  Top.removeReady(SU);
3015 
3016  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3017  return SU;
3018 }
3019 
3020 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3021 /// scheduled/remaining flags in the DAG nodes.
3022 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3023  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3024  Top.bumpNode(SU);
3025 }
3026 
3027 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
3029  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
3030 }
3031 
3032 //===----------------------------------------------------------------------===//
3033 // ILP Scheduler. Currently for experimental analysis of heuristics.
3034 //===----------------------------------------------------------------------===//
3035 
3036 namespace {
3037 /// \brief Order nodes by the ILP metric.
3038 struct ILPOrder {
3039  const SchedDFSResult *DFSResult;
3040  const BitVector *ScheduledTrees;
3041  bool MaximizeILP;
3042 
3043  ILPOrder(bool MaxILP)
3044  : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
3045 
3046  /// \brief Apply a less-than relation on node priority.
3047  ///
3048  /// (Return true if A comes after B in the Q.)
3049  bool operator()(const SUnit *A, const SUnit *B) const {
3050  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3051  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3052  if (SchedTreeA != SchedTreeB) {
3053  // Unscheduled trees have lower priority.
3054  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3055  return ScheduledTrees->test(SchedTreeB);
3056 
3057  // Trees with shallower connections have have lower priority.
3058  if (DFSResult->getSubtreeLevel(SchedTreeA)
3059  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3060  return DFSResult->getSubtreeLevel(SchedTreeA)
3061  < DFSResult->getSubtreeLevel(SchedTreeB);
3062  }
3063  }
3064  if (MaximizeILP)
3065  return DFSResult->getILP(A) < DFSResult->getILP(B);
3066  else
3067  return DFSResult->getILP(A) > DFSResult->getILP(B);
3068  }
3069 };
3070 
3071 /// \brief Schedule based on the ILP metric.
3072 class ILPScheduler : public MachineSchedStrategy {
3073  ScheduleDAGMILive *DAG;
3074  ILPOrder Cmp;
3075 
3076  std::vector<SUnit*> ReadyQ;
3077 public:
3078  ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
3079 
3080  void initialize(ScheduleDAGMI *dag) override {
3081  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3082  DAG = static_cast<ScheduleDAGMILive*>(dag);
3083  DAG->computeDFSResult();
3084  Cmp.DFSResult = DAG->getDFSResult();
3085  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3086  ReadyQ.clear();
3087  }
3088 
3089  void registerRoots() override {
3090  // Restore the heap in ReadyQ with the updated DFS results.
3091  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3092  }
3093 
3094  /// Implement MachineSchedStrategy interface.
3095  /// -----------------------------------------
3096 
3097  /// Callback to select the highest priority node from the ready Q.
3098  SUnit *pickNode(bool &IsTopNode) override {
3099  if (ReadyQ.empty()) return nullptr;
3100  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3101  SUnit *SU = ReadyQ.back();
3102  ReadyQ.pop_back();
3103  IsTopNode = false;
3104  DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3105  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3106  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3107  << DAG->getDFSResult()->getSubtreeLevel(
3108  DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3109  << "Scheduling " << *SU->getInstr());
3110  return SU;
3111  }
3112 
3113  /// \brief Scheduler callback to notify that a new subtree is scheduled.
3114  void scheduleTree(unsigned SubtreeID) override {
3115  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3116  }
3117 
3118  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3119  /// DFSResults, and resort the priority Q.
3120  void schedNode(SUnit *SU, bool IsTopNode) override {
3121  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3122  }
3123 
3124  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3125 
3126  void releaseBottomNode(SUnit *SU) override {
3127  ReadyQ.push_back(SU);
3128  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3129  }
3130 };
3131 } // namespace
3132 
3134  return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
3135 }
3137  return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
3138 }
3140  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3142  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3143 
3144 //===----------------------------------------------------------------------===//
3145 // Machine Instruction Shuffler for Correctness Testing
3146 //===----------------------------------------------------------------------===//
3147 
3148 #ifndef NDEBUG
3149 namespace {
3150 /// Apply a less-than relation on the node order, which corresponds to the
3151 /// instruction order prior to scheduling. IsReverse implements greater-than.
3152 template<bool IsReverse>
3153 struct SUnitOrder {
3154  bool operator()(SUnit *A, SUnit *B) const {
3155  if (IsReverse)
3156  return A->NodeNum > B->NodeNum;
3157  else
3158  return A->NodeNum < B->NodeNum;
3159  }
3160 };
3161 
3162 /// Reorder instructions as much as possible.
3163 class InstructionShuffler : public MachineSchedStrategy {
3164  bool IsAlternating;
3165  bool IsTopDown;
3166 
3167  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3168  // gives nodes with a higher number higher priority causing the latest
3169  // instructions to be scheduled first.
3170  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
3171  TopQ;
3172  // When scheduling bottom-up, use greater-than as the queue priority.
3173  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
3174  BottomQ;
3175 public:
3176  InstructionShuffler(bool alternate, bool topdown)
3177  : IsAlternating(alternate), IsTopDown(topdown) {}
3178 
3179  void initialize(ScheduleDAGMI*) override {
3180  TopQ.clear();
3181  BottomQ.clear();
3182  }
3183 
3184  /// Implement MachineSchedStrategy interface.
3185  /// -----------------------------------------
3186 
3187  SUnit *pickNode(bool &IsTopNode) override {
3188  SUnit *SU;
3189  if (IsTopDown) {
3190  do {
3191  if (TopQ.empty()) return nullptr;
3192  SU = TopQ.top();
3193  TopQ.pop();
3194  } while (SU->isScheduled);
3195  IsTopNode = true;
3196  }
3197  else {
3198  do {
3199  if (BottomQ.empty()) return nullptr;
3200  SU = BottomQ.top();
3201  BottomQ.pop();
3202  } while (SU->isScheduled);
3203  IsTopNode = false;
3204  }
3205  if (IsAlternating)
3206  IsTopDown = !IsTopDown;
3207  return SU;
3208  }
3209 
3210  void schedNode(SUnit *SU, bool IsTopNode) override {}
3211 
3212  void releaseTopNode(SUnit *SU) override {
3213  TopQ.push(SU);
3214  }
3215  void releaseBottomNode(SUnit *SU) override {
3216  BottomQ.push(SU);
3217  }
3218 };
3219 } // namespace
3220 
3222  bool Alternate = !ForceTopDown && !ForceBottomUp;
3223  bool TopDown = !ForceBottomUp;
3224  assert((TopDown || !ForceTopDown) &&
3225  "-misched-topdown incompatible with -misched-bottomup");
3226  return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
3227 }
3229  "shuffle", "Shuffle machine instructions alternating directions",
3231 #endif // !NDEBUG
3232 
3233 //===----------------------------------------------------------------------===//
3234 // GraphWriter support for ScheduleDAGMILive.
3235 //===----------------------------------------------------------------------===//
3236 
3237 #ifndef NDEBUG
3238 namespace llvm {
3239 
3240 template<> struct GraphTraits<
3242 
3243 template<>
3245 
3246  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3247 
3248  static std::string getGraphName(const ScheduleDAG *G) {
3249  return G->MF.getName();
3250  }
3251 
3252  static bool renderGraphFromBottomUp() {
3253  return true;
3254  }
3255 
3256  static bool isNodeHidden(const SUnit *Node) {
3257  return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3258  }
3259 
3260  static bool hasNodeAddressLabel(const SUnit *Node,
3261  const ScheduleDAG *Graph) {
3262  return false;
3263  }
3264 
3265  /// If you want to override the dot attributes printed for a particular
3266  /// edge, override this method.
3267  static std::string getEdgeAttributes(const SUnit *Node,
3268  SUnitIterator EI,
3269  const ScheduleDAG *Graph) {
3270  if (EI.isArtificialDep())
3271  return "color=cyan,style=dashed";
3272  if (EI.isCtrlDep())
3273  return "color=blue,style=dashed";
3274  return "";
3275  }
3276 
3277  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3278  std::string Str;
3279  raw_string_ostream SS(Str);
3280  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3281  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3282  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3283  SS << "SU:" << SU->NodeNum;
3284  if (DFS)
3285  SS << " I:" << DFS->getNumInstrs(SU);
3286  return SS.str();
3287  }
3288  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3289  return G->getGraphNodeLabel(SU);
3290  }
3291 
3292  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3293  std::string Str("shape=Mrecord");
3294  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3295  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3296  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3297  if (DFS) {
3298  Str += ",style=filled,fillcolor=\"#";
3299  Str += DOT::getColorString(DFS->getSubtreeID(N));
3300  Str += '"';
3301  }
3302  return Str;
3303  }
3304 };
3305 } // namespace llvm
3306 #endif // NDEBUG
3307 
3308 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3309 /// rendered using 'dot'.
3310 ///
3311 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3312 #ifndef NDEBUG
3313  ViewGraph(this, Name, false, Title);
3314 #else
3315  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3316  << "systems with Graphviz or gv!\n";
3317 #endif // NDEBUG
3318 }
3319 
3320 /// Out-of-line implementation with no arguments is handy for gdb.
3322  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3323 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:192
iterator end()
Returns an iterator past this container.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
static int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:70
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
BitVector & set()
Definition: BitVector.h:218
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
bool advance()
Advance across the current instruction.
void pickNodeFromQueue(SchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
void clear()
Clear the results.
Definition: ScheduleDFS.h:129
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Segments::iterator iterator
Definition: LiveInterval.h:204
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:53
void releaseTopNode(SUnit *SU)
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:344
const MachineSchedContext * Context
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool isInQueue(SUnit *SU) const
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: GraphTraits.h:27
static MachineSchedRegistry ILPMaxRegistry("ilpmax","Schedule bottom-up for max ILP", createILPMaxScheduler)
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
static bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr)
buildSchedGraph - Build SUnits from the MachineBasicBlock that we are input.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII, bool IsPostRA)
Return true of the given instruction should not be included in a scheduling region.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:588
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:406
iterator end() const
Definition: ArrayRef.h:123
SlotIndex getInstructionIndex(const MachineInstr *instr) const
Returns the base index of the given instruction.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
void verify(Pass *p=nullptr, const char *Banner=nullptr) const
verify - Run the current MachineFunction through the machine code verifier, useful for debugger use...
void traceCandidate(const SchedCandidate &Cand)
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:470
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Mutate the DAG as a postpass after normal DAG building.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
getGraphNodeLabel - Return a label for an SUnit node in a visualization of the ScheduleDAG.
virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolve and cache a resolved scheduling class for an SUnit.
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Summarize the unscheduled region.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
RegisterPassParser class - Handle the addition of new machine passes.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
RegisterClassInfo * RegClassInfo
MachineBasicBlock::iterator top() const
VNInfo - Value Number Information.
Definition: LiveInterval.h:45
Instructions::iterator instr_iterator
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::unique_ptr< MachineSchedStrategy > SchedImpl
unsigned BotReadyCycle
Definition: ScheduleDAG.h:318
void fixupKills(MachineBasicBlock *MBB)
Fix register kill flags that scheduling has made invalid.
bool isArtificial() const
isArtificial - Test if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for correctness.
Definition: ScheduleDAG.h:216
static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, bool IsTop)
INITIALIZE_PASS_BEGIN(MachineScheduler,"machine-scheduler","Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:275
virtual void startBlock(MachineBasicBlock *BB)
startBlock - Prepare to perform scheduling in the given block.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
unsigned getDependentLatency() const
MachineFunction & MF
Definition: ScheduleDAG.h:563
const TargetSchedModel * getSchedModel() const
Get the machine model for instruction scheduling.
unsigned NumInstrsScheduled
The number of instructions scheduled so far.
bool isScheduled
Definition: ScheduleDAG.h:303
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
static const SCEV * apply(const SCEV *Scev, LoopToScevMapT &Map, ScalarEvolution &SE)
Applies the Map (Loop -> SCEV) to the given Scev.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
unsigned getHeight() const
getHeight - Return the height of this node, which is the length of the maximum path down to any node ...
Definition: ScheduleDAG.h:431
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
void clear()
clear - Clear all bits.
Definition: BitVector.h:187
void releaseBottomNode(SUnit *SU)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:288
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
bool isWeak() const
isWeak - Test if this a weak dependence.
Definition: ScheduleDAG.h:210
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
static MachineSchedRegistry DefaultSchedRegistry("default","Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
iterator end()
Definition: LiveInterval.h:206
unsigned getPSet() const
static unsigned getWeakLeft(const SUnit *SU, bool isTop)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:566
Result of a LiveRange query.
Definition: LiveInterval.h:86
bool hasPhysRegUses
Definition: ScheduleDAG.h:298
Reg
All possible values of the reg field in the ModR/M byte.
bool hasPhysRegDefs
Definition: ScheduleDAG.h:299
PressureDiff & getPressureDiff(const SUnit *SU)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
virtual bool getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg, unsigned &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
void InitDAGTopologicalSorting()
InitDAGTopologicalSorting - create the initial topological ordering from the DAG to be scheduled...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static ScheduleDAGInstrs * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
#define G(x, y, z)
Definition: MD5.cpp:52
static MachineSchedRegistry ShufflerRegistry("shuffle","Shuffle machine instructions alternating directions", createInstructionShuffler)
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
load Combine Adjacent Loads
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Definition: ScheduleDAG.h:317
static const unsigned InvalidCycle
MachineBasicBlock::iterator LiveRegionEnd
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
machine scheduler
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
iterator find(SUnit *SU)
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Definition: ScheduleDAG.h:308
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
SlotIndexes pass.
Definition: SlotIndexes.h:334
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Apply a set of heursitics to a new candidate.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
reverse_iterator rbegin() const
Definition: ArrayRef.h:125
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:290
bool recede(SmallVectorImpl< unsigned > *LiveUses=nullptr, PressureDiff *PDiff=nullptr)
Recede across the previous instruction.
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
VReg2UseMap VRegUses
After calling BuildSchedGraph, each vreg used in the scheduling region is mapped to a set of SUnits...
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
static std::string getGraphName(const ScheduleDAG *G)
SchedRemainder * Rem
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
Definition: ScheduleDAG.h:287
void print(raw_ostream &OS, SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
void releasePending()
Release pending ready nodes in to the available queue.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:301
MachinePassRegistry - Track the registration of machine passes.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the scheduler state for the next scheduling region.
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
CandReason
Represent the type of SchedCandidate found within a single queue.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
virtual bool enableClusterLoads() const
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
void incExecutedResources(unsigned PIdx, unsigned Count)
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
static cl::opt< bool > EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true))
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:137
std::vector< SUnit * >::iterator iterator
SDep - Scheduling dependency.
Definition: ScheduleDAG.h:45
bool isUnbuffered
Definition: ScheduleDAG.h:307
reverse_iterator rend() const
Definition: ArrayRef.h:126
RegisterClassInfo * RegClassInfo
bundle_iterator< MachineInstr, instr_iterator > iterator
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
iterator remove(iterator I)
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
#define P(N)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
ScheduleHazardRecognizer * HazardRec
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:493
ArrayRef< unsigned > getLiveThru() const
Helpers for implementing custom MachineSchedStrategy classes.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
const InstrItineraryData * getInstrItineraries() const
unsigned short Latency
Definition: ScheduleDAG.h:292
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
void dumpSchedule() const
dump the scheduled Sequence.
unsigned getZoneCritResIdx() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
bundle_iterator - MachineBasicBlock iterator that automatically skips over MIs that are inside bundle...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
static MachineSchedRegistry GenericSchedRegistry("converge","Standard converging scheduler.", createGenericSchedLive)
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
bool isCopy() const
Definition: MachineInstr.h:778
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
unsigned getLatency() const
getLatency - Return the latency value for this edge, which roughly means the minimum number of cycles...
Definition: ScheduleDAG.h:150
bool empty() const
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
virtual bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, unsigned NumLoads) const
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
iterator begin() const
Definition: ArrayRef.h:122
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
SmallVector< unsigned, 8 > LiveOutRegs
Policy for scheduling the next instruction in the candidate's zone.
const TargetSchedModel * SchedModel
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Notify that the scheduler has finished scheduling the current region.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:348
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:69
const TargetRegisterInfo * TRI
SchedDFSResult * DFSResult
Information about DAG subtrees.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
LiveIntervals * getLIS() const
Expose LiveIntervals for use in DAG mutators and such.
bool isNextSU(const SUnit *SU) const
Return true if the given SU is used by the most recently scheduled instruction.
static cl::opt< bool > EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true))
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:74
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * NextClusterSucc
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:480
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker. ...
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
iterator find(const KeyT &Key)
Find an element by its key.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:289
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:206
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:276
ScheduleDAGInstrs *(* ScheduleDAGCtor)(MachineSchedContext *)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static ScheduleDAGInstrs * createGenericSchedLive(MachineSchedContext *C)
Forward declare the standard machine scheduler.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
bool test(unsigned Idx) const
Definition: BitVector.h:322
bool isCtrlDep() const
isCtrlDep - Test if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
virtual const TargetLowering * getTargetLowering() const
static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:103
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
CHAIN = SC CHAIN, Imm128 - System call.
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:58
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
LiveInterval & getInterval(unsigned Reg)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
virtual void finalizeSchedule()
finalizeSchedule - Allow targets to perform final scheduling actions at the level of the whole Machin...
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heursitics to a new candidate for PostRA scheduling.
bool isTopReady() const
Definition: ScheduleDAG.h:473
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
void biasCriticalPath()
Order this node's predecessor edges such that the critical path edge occurs first.
PressureChange CriticalMax
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void schedule()=0
schedule - Order nodes according to selected style, filling in the Sequence member.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
machine Machine Instruction Scheduler
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
MachineBasicBlock::iterator bottom() const
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
MachineBasicBlock::iterator end() const
end - Return an iterator to the bottom of the current scheduling region.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
PrintVRegOrUnit - It is often convenient to track virtual registers and physical register units in th...
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:377
unsigned getDepth() const
getDepth - Return the depth of this node, which is the length of the maximum path up to any node whic...
Definition: ScheduleDAG.h:423
void updatePressureDiffs(ArrayRef< unsigned > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
const SUnit * getNextClusterSucc() const
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool ShouldTrackUntiedDefs=false)
Setup the RegPressureTracker.
#define Success
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
unsigned getUnscheduledLatency(SUnit *SU) const
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:356
bool isCluster() const
isCluster - Test if this is an Order dependence that is marked as "cluster", meaning it is artificial...
Definition: ScheduleDAG.h:222
SUnit * getSUnit(MachineInstr *MI) const
getSUnit - Return an existing SUnit for this MI, or NULL.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
Representation of each machine instruction.
Definition: MachineInstr.h:51
void initializePostMachineSchedulerPass(PassRegistry &)
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
static bool isPhysicalRegister(unsigned Reg)
isPhysicalRegister - Return true if the specified register number is in the physical register namespa...
machine Machine Instruction false
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:562
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction's critical resource consumption.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:363
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
SmallVector< unsigned, 16 > RemainingCounts
static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:561
static bool isNodeHidden(const SUnit *Node)
unsigned NodeNum
Definition: ScheduleDAG.h:283
unsigned getPSetOrMax() const
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:205
unsigned getReg() const
getReg - Returns the register number.
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void addLiveRegs(ArrayRef< unsigned > Regs)
Force liveness of virtual registers or physical register units.
bool addPred(const SDep &D, bool Required=true)
addPred - This adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:65
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:465
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:276
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool isBottomReady() const
Definition: ScheduleDAG.h:476
SmallVector< unsigned, 8 > LiveInRegs
List of live in virtual registers or physical register units.
static MachineSchedRegistry ILPMinRegistry("ilpmin","Schedule bottom-up for min ILP", createILPMinScheduler)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
BasicBlockListType::iterator iterator
const SUnit * getNextClusterPred() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
StringRef getName() const
#define DEBUG(X)
Definition: Debug.h:92
unsigned size() const
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
void push(SUnit *SU)
void AddPred(SUnit *Y, SUnit *X)
AddPred - Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
MachineBasicBlock * BB
State specific to the current scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
const TargetSchedModel * SchedModel
INITIALIZE_PASS(PostMachineScheduler,"postmisched","PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
void setPos(MachineBasicBlock::const_iterator Pos)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:101
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
virtual bool shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const
Can this target fuse the given instructions if they are scheduled adjacent.
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:564
std::vector< SUnit > SUnits
Definition: ScheduleDAG.h:565
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:92
const char * getResourceName(unsigned PIdx) const
static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
RegPressureTracker TopRPTracker
RegPressureTracker RPTracker
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
void pickNodeFromQueue(SchedCandidate &Cand)
unsigned getResourceCount(unsigned ResIdx) const
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:400
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:261
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
bool isResourceLimited() const
void resize(size_type N)
Definition: SmallVector.h:376