LLVM  10.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
86  "verify-misched", cl::Hidden,
87  cl::desc("Verify machine instrs before and after machine scheduling"));
88 
89 } // end namespace llvm
90 
91 #ifndef NDEBUG
92 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
93  cl::desc("Pop up a window to show MISched dags after they are processed"));
94 
95 /// In some situations a few uninteresting nodes depend on nearly all other
96 /// nodes in the graph, provide a cutoff to hide them.
97 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
98  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
99 
100 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
101  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
102 
103 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
104  cl::desc("Only schedule this function"));
105 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
106  cl::desc("Only schedule this MBB#"));
107 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
108  cl::desc("Print schedule DAGs"));
109 #else
110 static const bool ViewMISchedDAGs = false;
111 static const bool PrintDAGs = false;
112 #endif // NDEBUG
113 
114 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
115 /// size of the ready lists.
116 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
117  cl::desc("Limit ready list to N instructions"), cl::init(256));
118 
119 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
120  cl::desc("Enable register pressure scheduling."), cl::init(true));
121 
122 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
123  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
124 
125 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
126  cl::desc("Enable memop clustering."),
127  cl::init(true));
128 
129 // DAG subtrees must have at least this many nodes.
130 static const unsigned MinSubtreeSize = 8;
131 
132 // Pin the vtables to this file.
133 void MachineSchedStrategy::anchor() {}
134 
135 void ScheduleDAGMutation::anchor() {}
136 
137 //===----------------------------------------------------------------------===//
138 // Machine Instruction Scheduling Pass and Registry
139 //===----------------------------------------------------------------------===//
140 
142  RegClassInfo = new RegisterClassInfo();
143 }
144 
146  delete RegClassInfo;
147 }
148 
149 namespace {
150 
151 /// Base class for a machine scheduler class that can run at any point.
152 class MachineSchedulerBase : public MachineSchedContext,
153  public MachineFunctionPass {
154 public:
155  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
156 
157  void print(raw_ostream &O, const Module* = nullptr) const override;
158 
159 protected:
160  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
161 };
162 
163 /// MachineScheduler runs after coalescing and before register allocation.
164 class MachineScheduler : public MachineSchedulerBase {
165 public:
166  MachineScheduler();
167 
168  void getAnalysisUsage(AnalysisUsage &AU) const override;
169 
170  bool runOnMachineFunction(MachineFunction&) override;
171 
172  static char ID; // Class identification, replacement for typeinfo
173 
174 protected:
175  ScheduleDAGInstrs *createMachineScheduler();
176 };
177 
178 /// PostMachineScheduler runs after shortly before code emission.
179 class PostMachineScheduler : public MachineSchedulerBase {
180 public:
181  PostMachineScheduler();
182 
183  void getAnalysisUsage(AnalysisUsage &AU) const override;
184 
185  bool runOnMachineFunction(MachineFunction&) override;
186 
187  static char ID; // Class identification, replacement for typeinfo
188 
189 protected:
190  ScheduleDAGInstrs *createPostMachineScheduler();
191 };
192 
193 } // end anonymous namespace
194 
195 char MachineScheduler::ID = 0;
196 
198 
199 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
200  "Machine Instruction Scheduler", false, false)
206 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
208 
209 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
211 }
212 
213 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
214  AU.setPreservesCFG();
219  AU.addRequired<SlotIndexes>();
224 }
225 
226 char PostMachineScheduler::ID = 0;
227 
229 
230 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
231  "PostRA Machine Instruction Scheduler", false, false)
232 
233 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
235 }
236 
237 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
238  AU.setPreservesCFG();
243 }
244 
247 
248 /// A dummy default scheduler factory indicates whether the scheduler
249 /// is overridden on the command line.
251  return nullptr;
252 }
253 
254 /// MachineSchedOpt allows command line selection of the scheduler.
257 MachineSchedOpt("misched",
259  cl::desc("Machine instruction scheduler to use"));
260 
262 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
264 
266  "enable-misched",
267  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
268  cl::Hidden);
269 
271  "enable-post-misched",
272  cl::desc("Enable the post-ra machine instruction scheduling pass."),
273  cl::init(true), cl::Hidden);
274 
275 /// Decrement this iterator until reaching the top or a non-debug instr.
279  assert(I != Beg && "reached the top of the region, cannot decrement");
280  while (--I != Beg) {
281  if (!I->isDebugInstr())
282  break;
283  }
284  return I;
285 }
286 
287 /// Non-const version.
293 }
294 
295 /// If this iterator is a debug value, increment until reaching the End or a
296 /// non-debug instruction.
300  for(; I != End; ++I) {
301  if (!I->isDebugInstr())
302  break;
303  }
304  return I;
305 }
306 
307 /// Non-const version.
313 }
314 
315 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
316 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
317  // Select the scheduler, or set the default.
318  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
319  if (Ctor != useDefaultMachineSched)
320  return Ctor(this);
321 
322  // Get the default scheduler set by the target for this function.
323  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
324  if (Scheduler)
325  return Scheduler;
326 
327  // Default to GenericScheduler.
328  return createGenericSchedLive(this);
329 }
330 
331 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
332 /// the caller. We don't have a command line option to override the postRA
333 /// scheduler. The Target must configure it.
334 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
335  // Get the postRA scheduler set by the target for this function.
336  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
337  if (Scheduler)
338  return Scheduler;
339 
340  // Default to GenericScheduler.
341  return createGenericSchedPostRA(this);
342 }
343 
344 /// Top-level MachineScheduler pass driver.
345 ///
346 /// Visit blocks in function order. Divide each block into scheduling regions
347 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
348 /// consistent with the DAG builder, which traverses the interior of the
349 /// scheduling regions bottom-up.
350 ///
351 /// This design avoids exposing scheduling boundaries to the DAG builder,
352 /// simplifying the DAG builder's support for "special" target instructions.
353 /// At the same time the design allows target schedulers to operate across
354 /// scheduling boundaries, for example to bundle the boundary instructions
355 /// without reordering them. This creates complexity, because the target
356 /// scheduler must update the RegionBegin and RegionEnd positions cached by
357 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
358 /// design would be to split blocks at scheduling boundaries, but LLVM has a
359 /// general bias against block splitting purely for implementation simplicity.
360 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
361  if (skipFunction(mf.getFunction()))
362  return false;
363 
364  if (EnableMachineSched.getNumOccurrences()) {
365  if (!EnableMachineSched)
366  return false;
367  } else if (!mf.getSubtarget().enableMachineScheduler())
368  return false;
369 
370  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
371 
372  // Initialize the context of the pass.
373  MF = &mf;
374  MLI = &getAnalysis<MachineLoopInfo>();
375  MDT = &getAnalysis<MachineDominatorTree>();
376  PassConfig = &getAnalysis<TargetPassConfig>();
377  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
378 
379  LIS = &getAnalysis<LiveIntervals>();
380 
381  if (VerifyScheduling) {
382  LLVM_DEBUG(LIS->dump());
383  MF->verify(this, "Before machine scheduling.");
384  }
385  RegClassInfo->runOnMachineFunction(*MF);
386 
387  // Instantiate the selected scheduler for this target, function, and
388  // optimization level.
389  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
390  scheduleRegions(*Scheduler, false);
391 
392  LLVM_DEBUG(LIS->dump());
393  if (VerifyScheduling)
394  MF->verify(this, "After machine scheduling.");
395  return true;
396 }
397 
398 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
399  if (skipFunction(mf.getFunction()))
400  return false;
401 
402  if (EnablePostRAMachineSched.getNumOccurrences()) {
403  if (!EnablePostRAMachineSched)
404  return false;
405  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
406  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
407  return false;
408  }
409  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
410 
411  // Initialize the context of the pass.
412  MF = &mf;
413  MLI = &getAnalysis<MachineLoopInfo>();
414  PassConfig = &getAnalysis<TargetPassConfig>();
415 
416  if (VerifyScheduling)
417  MF->verify(this, "Before post machine scheduling.");
418 
419  // Instantiate the selected scheduler for this target, function, and
420  // optimization level.
421  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
422  scheduleRegions(*Scheduler, true);
423 
424  if (VerifyScheduling)
425  MF->verify(this, "After post machine scheduling.");
426  return true;
427 }
428 
429 /// Return true of the given instruction should not be included in a scheduling
430 /// region.
431 ///
432 /// MachineScheduler does not currently support scheduling across calls. To
433 /// handle calls, the DAG builder needs to be modified to create register
434 /// anti/output dependencies on the registers clobbered by the call's regmask
435 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
436 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
437 /// the boundary, but there would be no benefit to postRA scheduling across
438 /// calls this late anyway.
440  MachineBasicBlock *MBB,
441  MachineFunction *MF,
442  const TargetInstrInfo *TII) {
443  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
444 }
445 
446 /// A region of an MBB for scheduling.
447 namespace {
448 struct SchedRegion {
449  /// RegionBegin is the first instruction in the scheduling region, and
450  /// RegionEnd is either MBB->end() or the scheduling boundary after the
451  /// last instruction in the scheduling region. These iterators cannot refer
452  /// to instructions outside of the identified scheduling region because
453  /// those may be reordered before scheduling this region.
454  MachineBasicBlock::iterator RegionBegin;
455  MachineBasicBlock::iterator RegionEnd;
456  unsigned NumRegionInstrs;
457 
459  unsigned N) :
460  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
461 };
462 } // end anonymous namespace
463 
465 
466 static void
468  MBBRegionsVector &Regions,
469  bool RegionsTopDown) {
470  MachineFunction *MF = MBB->getParent();
471  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
472 
473  MachineBasicBlock::iterator I = nullptr;
474  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
475  RegionEnd != MBB->begin(); RegionEnd = I) {
476 
477  // Avoid decrementing RegionEnd for blocks with no terminator.
478  if (RegionEnd != MBB->end() ||
479  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
480  --RegionEnd;
481  }
482 
483  // The next region starts above the previous region. Look backward in the
484  // instruction stream until we find the nearest boundary.
485  unsigned NumRegionInstrs = 0;
486  I = RegionEnd;
487  for (;I != MBB->begin(); --I) {
488  MachineInstr &MI = *std::prev(I);
489  if (isSchedBoundary(&MI, &*MBB, MF, TII))
490  break;
491  if (!MI.isDebugInstr()) {
492  // MBB::size() uses instr_iterator to count. Here we need a bundle to
493  // count as a single instruction.
494  ++NumRegionInstrs;
495  }
496  }
497 
498  // It's possible we found a scheduling region that only has debug
499  // instructions. Don't bother scheduling these.
500  if (NumRegionInstrs != 0)
501  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
502  }
503 
504  if (RegionsTopDown)
505  std::reverse(Regions.begin(), Regions.end());
506 }
507 
508 /// Main driver for both MachineScheduler and PostMachineScheduler.
509 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
510  bool FixKillFlags) {
511  // Visit all machine basic blocks.
512  //
513  // TODO: Visit blocks in global postorder or postorder within the bottom-up
514  // loop tree. Then we can optionally compute global RegPressure.
515  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
516  MBB != MBBEnd; ++MBB) {
517 
518  Scheduler.startBlock(&*MBB);
519 
520 #ifndef NDEBUG
521  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
522  continue;
523  if (SchedOnlyBlock.getNumOccurrences()
524  && (int)SchedOnlyBlock != MBB->getNumber())
525  continue;
526 #endif
527 
528  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
529  // points to the scheduling boundary at the bottom of the region. The DAG
530  // does not include RegionEnd, but the region does (i.e. the next
531  // RegionEnd is above the previous RegionBegin). If the current block has
532  // no terminator then RegionEnd == MBB->end() for the bottom region.
533  //
534  // All the regions of MBB are first found and stored in MBBRegions, which
535  // will be processed (MBB) top-down if initialized with true.
536  //
537  // The Scheduler may insert instructions during either schedule() or
538  // exitRegion(), even for empty regions. So the local iterators 'I' and
539  // 'RegionEnd' are invalid across these calls. Instructions must not be
540  // added to other regions than the current one without updating MBBRegions.
541 
542  MBBRegionsVector MBBRegions;
543  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
544  for (MBBRegionsVector::iterator R = MBBRegions.begin();
545  R != MBBRegions.end(); ++R) {
546  MachineBasicBlock::iterator I = R->RegionBegin;
547  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
548  unsigned NumRegionInstrs = R->NumRegionInstrs;
549 
550  // Notify the scheduler of the region, even if we may skip scheduling
551  // it. Perhaps it still needs to be bundled.
552  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
553 
554  // Skip empty scheduling regions (0 or 1 schedulable instructions).
555  if (I == RegionEnd || I == std::prev(RegionEnd)) {
556  // Close the current region. Bundle the terminator if needed.
557  // This invalidates 'RegionEnd' and 'I'.
558  Scheduler.exitRegion();
559  continue;
560  }
561  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
562  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
563  << " " << MBB->getName() << "\n From: " << *I
564  << " To: ";
565  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
566  else dbgs() << "End";
567  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
569  errs() << MF->getName();
570  errs() << ":%bb. " << MBB->getNumber();
571  errs() << " " << MBB->getName() << " \n";
572  }
573 
574  // Schedule a region: possibly reorder instructions.
575  // This invalidates the original region iterators.
576  Scheduler.schedule();
577 
578  // Close the current region.
579  Scheduler.exitRegion();
580  }
581  Scheduler.finishBlock();
582  // FIXME: Ideally, no further passes should rely on kill flags. However,
583  // thumb2 size reduction is currently an exception, so the PostMIScheduler
584  // needs to do this.
585  if (FixKillFlags)
586  Scheduler.fixupKills(*MBB);
587  }
588  Scheduler.finalizeSchedule();
589 }
590 
591 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
592  // unimplemented
593 }
594 
595 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
597  dbgs() << "Queue " << Name << ": ";
598  for (const SUnit *SU : Queue)
599  dbgs() << SU->NodeNum << " ";
600  dbgs() << "\n";
601 }
602 #endif
603 
604 //===----------------------------------------------------------------------===//
605 // ScheduleDAGMI - Basic machine instruction scheduling. This is
606 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
607 // virtual registers.
608 // ===----------------------------------------------------------------------===/
609 
610 // Provide a vtable anchor.
612 
613 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
614 /// NumPredsLeft reaches zero, release the successor node.
615 ///
616 /// FIXME: Adjust SuccSU height based on MinLatency.
617 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
618  SUnit *SuccSU = SuccEdge->getSUnit();
619 
620  if (SuccEdge->isWeak()) {
621  --SuccSU->WeakPredsLeft;
622  if (SuccEdge->isCluster())
623  NextClusterSucc = SuccSU;
624  return;
625  }
626 #ifndef NDEBUG
627  if (SuccSU->NumPredsLeft == 0) {
628  dbgs() << "*** Scheduling failed! ***\n";
629  dumpNode(*SuccSU);
630  dbgs() << " has been released too many times!\n";
631  llvm_unreachable(nullptr);
632  }
633 #endif
634  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
635  // CurrCycle may have advanced since then.
636  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
637  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
638 
639  --SuccSU->NumPredsLeft;
640  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
641  SchedImpl->releaseTopNode(SuccSU);
642 }
643 
644 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
646  for (SDep &Succ : SU->Succs)
647  releaseSucc(SU, &Succ);
648 }
649 
650 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
651 /// NumSuccsLeft reaches zero, release the predecessor node.
652 ///
653 /// FIXME: Adjust PredSU height based on MinLatency.
654 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
655  SUnit *PredSU = PredEdge->getSUnit();
656 
657  if (PredEdge->isWeak()) {
658  --PredSU->WeakSuccsLeft;
659  if (PredEdge->isCluster())
660  NextClusterPred = PredSU;
661  return;
662  }
663 #ifndef NDEBUG
664  if (PredSU->NumSuccsLeft == 0) {
665  dbgs() << "*** Scheduling failed! ***\n";
666  dumpNode(*PredSU);
667  dbgs() << " has been released too many times!\n";
668  llvm_unreachable(nullptr);
669  }
670 #endif
671  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
672  // CurrCycle may have advanced since then.
673  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
674  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
675 
676  --PredSU->NumSuccsLeft;
677  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
678  SchedImpl->releaseBottomNode(PredSU);
679 }
680 
681 /// releasePredecessors - Call releasePred on each of SU's predecessors.
683  for (SDep &Pred : SU->Preds)
684  releasePred(SU, &Pred);
685 }
686 
689  SchedImpl->enterMBB(bb);
690 }
691 
693  SchedImpl->leaveMBB();
695 }
696 
697 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
698 /// crossing a scheduling boundary. [begin, end) includes all instructions in
699 /// the region, including the boundary itself and single-instruction regions
700 /// that don't get scheduled.
704  unsigned regioninstrs)
705 {
706  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
707 
708  SchedImpl->initPolicy(begin, end, regioninstrs);
709 }
710 
711 /// This is normally called from the main scheduler loop but may also be invoked
712 /// by the scheduling strategy to perform additional code motion.
715  // Advance RegionBegin if the first instruction moves down.
716  if (&*RegionBegin == MI)
717  ++RegionBegin;
718 
719  // Update the instruction stream.
720  BB->splice(InsertPos, BB, MI);
721 
722  // Update LiveIntervals
723  if (LIS)
724  LIS->handleMove(*MI, /*UpdateFlags=*/true);
725 
726  // Recede RegionBegin if an instruction moves above the first.
727  if (RegionBegin == InsertPos)
728  RegionBegin = MI;
729 }
730 
732 #ifndef NDEBUG
733  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
734  CurrentTop = CurrentBottom;
735  return false;
736  }
737  ++NumInstrsScheduled;
738 #endif
739  return true;
740 }
741 
742 /// Per-region scheduling driver, called back from
743 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
744 /// does not consider liveness or register pressure. It is useful for PostRA
745 /// scheduling and potentially other custom schedulers.
747  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
748  LLVM_DEBUG(SchedImpl->dumpPolicy());
749 
750  // Build the DAG.
751  buildSchedGraph(AA);
752 
753  postprocessDAG();
754 
755  SmallVector<SUnit*, 8> TopRoots, BotRoots;
756  findRootsAndBiasEdges(TopRoots, BotRoots);
757 
758  LLVM_DEBUG(dump());
759  if (PrintDAGs) dump();
760  if (ViewMISchedDAGs) viewGraph();
761 
762  // Initialize the strategy before modifying the DAG.
763  // This may initialize a DFSResult to be used for queue priority.
764  SchedImpl->initialize(this);
765 
766  // Initialize ready queues now that the DAG and priority data are finalized.
767  initQueues(TopRoots, BotRoots);
768 
769  bool IsTopNode = false;
770  while (true) {
771  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
772  SUnit *SU = SchedImpl->pickNode(IsTopNode);
773  if (!SU) break;
774 
775  assert(!SU->isScheduled && "Node already scheduled");
776  if (!checkSchedLimit())
777  break;
778 
779  MachineInstr *MI = SU->getInstr();
780  if (IsTopNode) {
781  assert(SU->isTopReady() && "node still has unscheduled dependencies");
782  if (&*CurrentTop == MI)
783  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
784  else
785  moveInstruction(MI, CurrentTop);
786  } else {
787  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
789  priorNonDebug(CurrentBottom, CurrentTop);
790  if (&*priorII == MI)
791  CurrentBottom = priorII;
792  else {
793  if (&*CurrentTop == MI)
794  CurrentTop = nextIfDebug(++CurrentTop, priorII);
795  moveInstruction(MI, CurrentBottom);
796  CurrentBottom = MI;
797  }
798  }
799  // Notify the scheduling strategy before updating the DAG.
800  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
801  // runs, it can then use the accurate ReadyCycle time to determine whether
802  // newly released nodes can move to the readyQ.
803  SchedImpl->schedNode(SU, IsTopNode);
804 
805  updateQueues(SU, IsTopNode);
806  }
807  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
808 
809  placeDebugValues();
810 
811  LLVM_DEBUG({
812  dbgs() << "*** Final schedule for "
813  << printMBBReference(*begin()->getParent()) << " ***\n";
814  dumpSchedule();
815  dbgs() << '\n';
816  });
817 }
818 
819 /// Apply each ScheduleDAGMutation step in order.
821  for (auto &m : Mutations)
822  m->apply(this);
823 }
824 
825 void ScheduleDAGMI::
827  SmallVectorImpl<SUnit*> &BotRoots) {
828  for (SUnit &SU : SUnits) {
829  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
830 
831  // Order predecessors so DFSResult follows the critical path.
832  SU.biasCriticalPath();
833 
834  // A SUnit is ready to top schedule if it has no predecessors.
835  if (!SU.NumPredsLeft)
836  TopRoots.push_back(&SU);
837  // A SUnit is ready to bottom schedule if it has no successors.
838  if (!SU.NumSuccsLeft)
839  BotRoots.push_back(&SU);
840  }
841  ExitSU.biasCriticalPath();
842 }
843 
844 /// Identify DAG roots and setup scheduler queues.
846  ArrayRef<SUnit*> BotRoots) {
847  NextClusterSucc = nullptr;
848  NextClusterPred = nullptr;
849 
850  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
851  //
852  // Nodes with unreleased weak edges can still be roots.
853  // Release top roots in forward order.
854  for (SUnit *SU : TopRoots)
855  SchedImpl->releaseTopNode(SU);
856 
857  // Release bottom roots in reverse order so the higher priority nodes appear
858  // first. This is more natural and slightly more efficient.
860  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
861  SchedImpl->releaseBottomNode(*I);
862  }
863 
864  releaseSuccessors(&EntrySU);
865  releasePredecessors(&ExitSU);
866 
867  SchedImpl->registerRoots();
868 
869  // Advance past initial DebugValues.
870  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
871  CurrentBottom = RegionEnd;
872 }
873 
874 /// Update scheduler queues after scheduling an instruction.
875 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
876  // Release dependent instructions for scheduling.
877  if (IsTopNode)
878  releaseSuccessors(SU);
879  else
880  releasePredecessors(SU);
881 
882  SU->isScheduled = true;
883 }
884 
885 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
887  // If first instruction was a DBG_VALUE then put it back.
888  if (FirstDbgValue) {
889  BB->splice(RegionBegin, BB, FirstDbgValue);
890  RegionBegin = FirstDbgValue;
891  }
892 
893  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
894  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
895  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
896  MachineInstr *DbgValue = P.first;
897  MachineBasicBlock::iterator OrigPrevMI = P.second;
898  if (&*RegionBegin == DbgValue)
899  ++RegionBegin;
900  BB->splice(++OrigPrevMI, BB, DbgValue);
901  if (OrigPrevMI == std::prev(RegionEnd))
902  RegionEnd = DbgValue;
903  }
904  DbgValues.clear();
905  FirstDbgValue = nullptr;
906 }
907 
908 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
910  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
911  if (SUnit *SU = getSUnit(&(*MI)))
912  dumpNode(*SU);
913  else
914  dbgs() << "Missing SUnit\n";
915  }
916 }
917 #endif
918 
919 //===----------------------------------------------------------------------===//
920 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
921 // preservation.
922 //===----------------------------------------------------------------------===//
923 
925  delete DFSResult;
926 }
927 
929  const MachineInstr &MI = *SU.getInstr();
930  for (const MachineOperand &MO : MI.operands()) {
931  if (!MO.isReg())
932  continue;
933  if (!MO.readsReg())
934  continue;
935  if (TrackLaneMasks && !MO.isUse())
936  continue;
937 
938  Register Reg = MO.getReg();
939  if (!Register::isVirtualRegister(Reg))
940  continue;
941 
942  // Ignore re-defs.
943  if (TrackLaneMasks) {
944  bool FoundDef = false;
945  for (const MachineOperand &MO2 : MI.operands()) {
946  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
947  FoundDef = true;
948  break;
949  }
950  }
951  if (FoundDef)
952  continue;
953  }
954 
955  // Record this local VReg use.
956  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
957  for (; UI != VRegUses.end(); ++UI) {
958  if (UI->SU == &SU)
959  break;
960  }
961  if (UI == VRegUses.end())
962  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
963  }
964 }
965 
966 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
967 /// crossing a scheduling boundary. [begin, end) includes all instructions in
968 /// the region, including the boundary itself and single-instruction regions
969 /// that don't get scheduled.
973  unsigned regioninstrs)
974 {
975  // ScheduleDAGMI initializes SchedImpl's per-region policy.
976  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
977 
978  // For convenience remember the end of the liveness region.
979  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
980 
981  SUPressureDiffs.clear();
982 
983  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
984  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
985 
986  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
987  "ShouldTrackLaneMasks requires ShouldTrackPressure");
988 }
989 
990 // Setup the register pressure trackers for the top scheduled and bottom
991 // scheduled regions.
993  VRegUses.clear();
994  VRegUses.setUniverse(MRI.getNumVirtRegs());
995  for (SUnit &SU : SUnits)
996  collectVRegUses(SU);
997 
998  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
999  ShouldTrackLaneMasks, false);
1000  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1001  ShouldTrackLaneMasks, false);
1002 
1003  // Close the RPTracker to finalize live ins.
1004  RPTracker.closeRegion();
1005 
1006  LLVM_DEBUG(RPTracker.dump());
1007 
1008  // Initialize the live ins and live outs.
1009  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1010  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1011 
1012  // Close one end of the tracker so we can call
1013  // getMaxUpward/DownwardPressureDelta before advancing across any
1014  // instructions. This converts currently live regs into live ins/outs.
1015  TopRPTracker.closeTop();
1016  BotRPTracker.closeBottom();
1017 
1018  BotRPTracker.initLiveThru(RPTracker);
1019  if (!BotRPTracker.getLiveThru().empty()) {
1020  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1021  LLVM_DEBUG(dbgs() << "Live Thru: ";
1022  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1023  };
1024 
1025  // For each live out vreg reduce the pressure change associated with other
1026  // uses of the same vreg below the live-out reaching def.
1027  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1028 
1029  // Account for liveness generated by the region boundary.
1030  if (LiveRegionEnd != RegionEnd) {
1032  BotRPTracker.recede(&LiveUses);
1033  updatePressureDiffs(LiveUses);
1034  }
1035 
1036  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1037  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1038  dbgs() << "Bottom Pressure:\n";
1039  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1040 
1041  assert((BotRPTracker.getPos() == RegionEnd ||
1042  (RegionEnd->isDebugInstr() &&
1043  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1044  "Can't find the region bottom");
1045 
1046  // Cache the list of excess pressure sets in this region. This will also track
1047  // the max pressure in the scheduled code for these sets.
1048  RegionCriticalPSets.clear();
1049  const std::vector<unsigned> &RegionPressure =
1050  RPTracker.getPressure().MaxSetPressure;
1051  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1052  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1053  if (RegionPressure[i] > Limit) {
1054  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1055  << " Actual " << RegionPressure[i] << "\n");
1056  RegionCriticalPSets.push_back(PressureChange(i));
1057  }
1058  }
1059  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1060  for (const PressureChange &RCPS
1061  : RegionCriticalPSets) dbgs()
1062  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1063  dbgs() << "\n");
1064 }
1065 
1068  const std::vector<unsigned> &NewMaxPressure) {
1069  const PressureDiff &PDiff = getPressureDiff(SU);
1070  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1071  for (const PressureChange &PC : PDiff) {
1072  if (!PC.isValid())
1073  break;
1074  unsigned ID = PC.getPSet();
1075  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1076  ++CritIdx;
1077  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1078  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1079  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1080  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1081  }
1082  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1083  if (NewMaxPressure[ID] >= Limit - 2) {
1084  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1085  << NewMaxPressure[ID]
1086  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1087  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1088  << " livethru)\n");
1089  }
1090  }
1091 }
1092 
1093 /// Update the PressureDiff array for liveness after scheduling this
1094 /// instruction.
1096  ArrayRef<RegisterMaskPair> LiveUses) {
1097  for (const RegisterMaskPair &P : LiveUses) {
1098  unsigned Reg = P.RegUnit;
1099  /// FIXME: Currently assuming single-use physregs.
1100  if (!Register::isVirtualRegister(Reg))
1101  continue;
1102 
1103  if (ShouldTrackLaneMasks) {
1104  // If the register has just become live then other uses won't change
1105  // this fact anymore => decrement pressure.
1106  // If the register has just become dead then other uses make it come
1107  // back to life => increment pressure.
1108  bool Decrement = P.LaneMask.any();
1109 
1110  for (const VReg2SUnit &V2SU
1111  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1112  SUnit &SU = *V2SU.SU;
1113  if (SU.isScheduled || &SU == &ExitSU)
1114  continue;
1115 
1116  PressureDiff &PDiff = getPressureDiff(&SU);
1117  PDiff.addPressureChange(Reg, Decrement, &MRI);
1118  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1119  << printReg(Reg, TRI) << ':'
1120  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1121  dbgs() << " to "; PDiff.dump(*TRI););
1122  }
1123  } else {
1124  assert(P.LaneMask.any());
1125  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1126  // This may be called before CurrentBottom has been initialized. However,
1127  // BotRPTracker must have a valid position. We want the value live into the
1128  // instruction or live out of the block, so ask for the previous
1129  // instruction's live-out.
1130  const LiveInterval &LI = LIS->getInterval(Reg);
1131  VNInfo *VNI;
1133  nextIfDebug(BotRPTracker.getPos(), BB->end());
1134  if (I == BB->end())
1135  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1136  else {
1137  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1138  VNI = LRQ.valueIn();
1139  }
1140  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1141  assert(VNI && "No live value at use.");
1142  for (const VReg2SUnit &V2SU
1143  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1144  SUnit *SU = V2SU.SU;
1145  // If this use comes before the reaching def, it cannot be a last use,
1146  // so decrease its pressure change.
1147  if (!SU->isScheduled && SU != &ExitSU) {
1148  LiveQueryResult LRQ =
1149  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1150  if (LRQ.valueIn() == VNI) {
1151  PressureDiff &PDiff = getPressureDiff(SU);
1152  PDiff.addPressureChange(Reg, true, &MRI);
1153  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1154  << *SU->getInstr();
1155  dbgs() << " to "; PDiff.dump(*TRI););
1156  }
1157  }
1158  }
1159  }
1160  }
1161 }
1162 
1164 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1165  if (EntrySU.getInstr() != nullptr)
1166  dumpNodeAll(EntrySU);
1167  for (const SUnit &SU : SUnits) {
1168  dumpNodeAll(SU);
1169  if (ShouldTrackPressure) {
1170  dbgs() << " Pressure Diff : ";
1171  getPressureDiff(&SU).dump(*TRI);
1172  }
1173  dbgs() << " Single Issue : ";
1174  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1175  SchedModel.mustEndGroup(SU.getInstr()))
1176  dbgs() << "true;";
1177  else
1178  dbgs() << "false;";
1179  dbgs() << '\n';
1180  }
1181  if (ExitSU.getInstr() != nullptr)
1182  dumpNodeAll(ExitSU);
1183 #endif
1184 }
1185 
1186 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1187 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1188 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1189 ///
1190 /// This is a skeletal driver, with all the functionality pushed into helpers,
1191 /// so that it can be easily extended by experimental schedulers. Generally,
1192 /// implementing MachineSchedStrategy should be sufficient to implement a new
1193 /// scheduling algorithm. However, if a scheduler further subclasses
1194 /// ScheduleDAGMILive then it will want to override this virtual method in order
1195 /// to update any specialized state.
1197  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1198  LLVM_DEBUG(SchedImpl->dumpPolicy());
1199  buildDAGWithRegPressure();
1200 
1201  postprocessDAG();
1202 
1203  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1204  findRootsAndBiasEdges(TopRoots, BotRoots);
1205 
1206  // Initialize the strategy before modifying the DAG.
1207  // This may initialize a DFSResult to be used for queue priority.
1208  SchedImpl->initialize(this);
1209 
1210  LLVM_DEBUG(dump());
1211  if (PrintDAGs) dump();
1212  if (ViewMISchedDAGs) viewGraph();
1213 
1214  // Initialize ready queues now that the DAG and priority data are finalized.
1215  initQueues(TopRoots, BotRoots);
1216 
1217  bool IsTopNode = false;
1218  while (true) {
1219  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1220  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1221  if (!SU) break;
1222 
1223  assert(!SU->isScheduled && "Node already scheduled");
1224  if (!checkSchedLimit())
1225  break;
1226 
1227  scheduleMI(SU, IsTopNode);
1228 
1229  if (DFSResult) {
1230  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1231  if (!ScheduledTrees.test(SubtreeID)) {
1232  ScheduledTrees.set(SubtreeID);
1233  DFSResult->scheduleTree(SubtreeID);
1234  SchedImpl->scheduleTree(SubtreeID);
1235  }
1236  }
1237 
1238  // Notify the scheduling strategy after updating the DAG.
1239  SchedImpl->schedNode(SU, IsTopNode);
1240 
1241  updateQueues(SU, IsTopNode);
1242  }
1243  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1244 
1245  placeDebugValues();
1246 
1247  LLVM_DEBUG({
1248  dbgs() << "*** Final schedule for "
1249  << printMBBReference(*begin()->getParent()) << " ***\n";
1250  dumpSchedule();
1251  dbgs() << '\n';
1252  });
1253 }
1254 
1255 /// Build the DAG and setup three register pressure trackers.
1257  if (!ShouldTrackPressure) {
1258  RPTracker.reset();
1259  RegionCriticalPSets.clear();
1260  buildSchedGraph(AA);
1261  return;
1262  }
1263 
1264  // Initialize the register pressure tracker used by buildSchedGraph.
1265  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1266  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1267 
1268  // Account for liveness generate by the region boundary.
1269  if (LiveRegionEnd != RegionEnd)
1270  RPTracker.recede();
1271 
1272  // Build the DAG, and compute current register pressure.
1273  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1274 
1275  // Initialize top/bottom trackers after computing region pressure.
1276  initRegPressure();
1277 }
1278 
1280  if (!DFSResult)
1281  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1282  DFSResult->clear();
1283  ScheduledTrees.clear();
1284  DFSResult->resize(SUnits.size());
1285  DFSResult->compute(SUnits);
1286  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1287 }
1288 
1289 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1290 /// only provides the critical path for single block loops. To handle loops that
1291 /// span blocks, we could use the vreg path latencies provided by
1292 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1293 /// available for use in the scheduler.
1294 ///
1295 /// The cyclic path estimation identifies a def-use pair that crosses the back
1296 /// edge and considers the depth and height of the nodes. For example, consider
1297 /// the following instruction sequence where each instruction has unit latency
1298 /// and defines an epomymous virtual register:
1299 ///
1300 /// a->b(a,c)->c(b)->d(c)->exit
1301 ///
1302 /// The cyclic critical path is a two cycles: b->c->b
1303 /// The acyclic critical path is four cycles: a->b->c->d->exit
1304 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1305 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1306 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1307 /// LiveInDepth = depth(b) = len(a->b) = 1
1308 ///
1309 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1310 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1311 /// CyclicCriticalPath = min(2, 2) = 2
1312 ///
1313 /// This could be relevant to PostRA scheduling, but is currently implemented
1314 /// assuming LiveIntervals.
1316  // This only applies to single block loop.
1317  if (!BB->isSuccessor(BB))
1318  return 0;
1319 
1320  unsigned MaxCyclicLatency = 0;
1321  // Visit each live out vreg def to find def/use pairs that cross iterations.
1322  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1323  unsigned Reg = P.RegUnit;
1324  if (!Register::isVirtualRegister(Reg))
1325  continue;
1326  const LiveInterval &LI = LIS->getInterval(Reg);
1327  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1328  if (!DefVNI)
1329  continue;
1330 
1331  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1332  const SUnit *DefSU = getSUnit(DefMI);
1333  if (!DefSU)
1334  continue;
1335 
1336  unsigned LiveOutHeight = DefSU->getHeight();
1337  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1338  // Visit all local users of the vreg def.
1339  for (const VReg2SUnit &V2SU
1340  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1341  SUnit *SU = V2SU.SU;
1342  if (SU == &ExitSU)
1343  continue;
1344 
1345  // Only consider uses of the phi.
1346  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1347  if (!LRQ.valueIn()->isPHIDef())
1348  continue;
1349 
1350  // Assume that a path spanning two iterations is a cycle, which could
1351  // overestimate in strange cases. This allows cyclic latency to be
1352  // estimated as the minimum slack of the vreg's depth or height.
1353  unsigned CyclicLatency = 0;
1354  if (LiveOutDepth > SU->getDepth())
1355  CyclicLatency = LiveOutDepth - SU->getDepth();
1356 
1357  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1358  if (LiveInHeight > LiveOutHeight) {
1359  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1360  CyclicLatency = LiveInHeight - LiveOutHeight;
1361  } else
1362  CyclicLatency = 0;
1363 
1364  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1365  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1366  if (CyclicLatency > MaxCyclicLatency)
1367  MaxCyclicLatency = CyclicLatency;
1368  }
1369  }
1370  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1371  return MaxCyclicLatency;
1372 }
1373 
1374 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1375 /// the Top RP tracker in case the region beginning has changed.
1377  ArrayRef<SUnit*> BotRoots) {
1378  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1379  if (ShouldTrackPressure) {
1380  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1381  TopRPTracker.setPos(CurrentTop);
1382  }
1383 }
1384 
1385 /// Move an instruction and update register pressure.
1386 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1387  // Move the instruction to its new location in the instruction stream.
1388  MachineInstr *MI = SU->getInstr();
1389 
1390  if (IsTopNode) {
1391  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1392  if (&*CurrentTop == MI)
1393  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1394  else {
1395  moveInstruction(MI, CurrentTop);
1396  TopRPTracker.setPos(MI);
1397  }
1398 
1399  if (ShouldTrackPressure) {
1400  // Update top scheduled pressure.
1401  RegisterOperands RegOpers;
1402  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1403  if (ShouldTrackLaneMasks) {
1404  // Adjust liveness and add missing dead+read-undef flags.
1405  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1406  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1407  } else {
1408  // Adjust for missing dead-def flags.
1409  RegOpers.detectDeadDefs(*MI, *LIS);
1410  }
1411 
1412  TopRPTracker.advance(RegOpers);
1413  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1414  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1415  TopRPTracker.getRegSetPressureAtPos(), TRI););
1416 
1417  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1418  }
1419  } else {
1420  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1421  MachineBasicBlock::iterator priorII =
1422  priorNonDebug(CurrentBottom, CurrentTop);
1423  if (&*priorII == MI)
1424  CurrentBottom = priorII;
1425  else {
1426  if (&*CurrentTop == MI) {
1427  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1428  TopRPTracker.setPos(CurrentTop);
1429  }
1430  moveInstruction(MI, CurrentBottom);
1431  CurrentBottom = MI;
1432  BotRPTracker.setPos(CurrentBottom);
1433  }
1434  if (ShouldTrackPressure) {
1435  RegisterOperands RegOpers;
1436  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1437  if (ShouldTrackLaneMasks) {
1438  // Adjust liveness and add missing dead+read-undef flags.
1439  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1440  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1441  } else {
1442  // Adjust for missing dead-def flags.
1443  RegOpers.detectDeadDefs(*MI, *LIS);
1444  }
1445 
1446  if (BotRPTracker.getPos() != CurrentBottom)
1447  BotRPTracker.recedeSkipDebugValues();
1449  BotRPTracker.recede(RegOpers, &LiveUses);
1450  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1451  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1452  BotRPTracker.getRegSetPressureAtPos(), TRI););
1453 
1454  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1455  updatePressureDiffs(LiveUses);
1456  }
1457  }
1458 }
1459 
1460 //===----------------------------------------------------------------------===//
1461 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1462 //===----------------------------------------------------------------------===//
1463 
1464 namespace {
1465 
1466 /// Post-process the DAG to create cluster edges between neighboring
1467 /// loads or between neighboring stores.
1468 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1469  struct MemOpInfo {
1470  SUnit *SU;
1471  const MachineOperand *BaseOp;
1472  int64_t Offset;
1473 
1474  MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1475  : SU(su), BaseOp(Op), Offset(ofs) {}
1476 
1477  bool operator<(const MemOpInfo &RHS) const {
1478  if (BaseOp->getType() != RHS.BaseOp->getType())
1479  return BaseOp->getType() < RHS.BaseOp->getType();
1480 
1481  if (BaseOp->isReg())
1482  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1483  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1484  RHS.SU->NodeNum);
1485  if (BaseOp->isFI()) {
1486  const MachineFunction &MF =
1487  *BaseOp->getParent()->getParent()->getParent();
1488  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1489  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1491  // Can't use tuple comparison here since we might need to use a
1492  // different order when the stack grows down.
1493  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1494  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1495  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1496 
1497  if (Offset != RHS.Offset)
1498  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1499 
1500  return SU->NodeNum < RHS.SU->NodeNum;
1501  }
1502 
1503  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1504  "index bases.");
1505  }
1506  };
1507 
1508  const TargetInstrInfo *TII;
1509  const TargetRegisterInfo *TRI;
1510  bool IsLoad;
1511 
1512 public:
1513  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1514  const TargetRegisterInfo *tri, bool IsLoad)
1515  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1516 
1517  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1518 
1519 protected:
1520  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1521 };
1522 
1523 class StoreClusterMutation : public BaseMemOpClusterMutation {
1524 public:
1525  StoreClusterMutation(const TargetInstrInfo *tii,
1526  const TargetRegisterInfo *tri)
1527  : BaseMemOpClusterMutation(tii, tri, false) {}
1528 };
1529 
1530 class LoadClusterMutation : public BaseMemOpClusterMutation {
1531 public:
1532  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1533  : BaseMemOpClusterMutation(tii, tri, true) {}
1534 };
1535 
1536 } // end anonymous namespace
1537 
1538 namespace llvm {
1539 
1540 std::unique_ptr<ScheduleDAGMutation>
1542  const TargetRegisterInfo *TRI) {
1543  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1544  : nullptr;
1545 }
1546 
1547 std::unique_ptr<ScheduleDAGMutation>
1549  const TargetRegisterInfo *TRI) {
1550  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1551  : nullptr;
1552 }
1553 
1554 } // end namespace llvm
1555 
1556 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1557  ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1558  SmallVector<MemOpInfo, 32> MemOpRecords;
1559  for (SUnit *SU : MemOps) {
1560  const MachineOperand *BaseOp;
1561  int64_t Offset;
1562  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1563  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1564  }
1565  if (MemOpRecords.size() < 2)
1566  return;
1567 
1568  llvm::sort(MemOpRecords);
1569  unsigned ClusterLength = 1;
1570  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1571  SUnit *SUa = MemOpRecords[Idx].SU;
1572  SUnit *SUb = MemOpRecords[Idx+1].SU;
1573  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1574  *MemOpRecords[Idx + 1].BaseOp,
1575  ClusterLength) &&
1576  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1577  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1578  << SUb->NodeNum << ")\n");
1579  // Copy successor edges from SUa to SUb. Interleaving computation
1580  // dependent on SUa can prevent load combining due to register reuse.
1581  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1582  // loads should have effectively the same inputs.
1583  for (const SDep &Succ : SUa->Succs) {
1584  if (Succ.getSUnit() == SUb)
1585  continue;
1586  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1587  << ")\n");
1588  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1589  }
1590  ++ClusterLength;
1591  } else
1592  ClusterLength = 1;
1593  }
1594 }
1595 
1596 /// Callback from DAG postProcessing to create cluster edges for loads.
1598  // Map DAG NodeNum to store chain ID.
1599  DenseMap<unsigned, unsigned> StoreChainIDs;
1600  // Map each store chain to a set of dependent MemOps.
1601  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1602  for (SUnit &SU : DAG->SUnits) {
1603  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1604  (!IsLoad && !SU.getInstr()->mayStore()))
1605  continue;
1606 
1607  unsigned ChainPredID = DAG->SUnits.size();
1608  for (const SDep &Pred : SU.Preds) {
1609  if (Pred.isCtrl()) {
1610  ChainPredID = Pred.getSUnit()->NodeNum;
1611  break;
1612  }
1613  }
1614  // Check if this chain-like pred has been seen
1615  // before. ChainPredID==MaxNodeID at the top of the schedule.
1616  unsigned NumChains = StoreChainDependents.size();
1617  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1618  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1619  if (Result.second)
1620  StoreChainDependents.resize(NumChains + 1);
1621  StoreChainDependents[Result.first->second].push_back(&SU);
1622  }
1623 
1624  // Iterate over the store chains.
1625  for (auto &SCD : StoreChainDependents)
1626  clusterNeighboringMemOps(SCD, DAG);
1627 }
1628 
1629 //===----------------------------------------------------------------------===//
1630 // CopyConstrain - DAG post-processing to encourage copy elimination.
1631 //===----------------------------------------------------------------------===//
1632 
1633 namespace {
1634 
1635 /// Post-process the DAG to create weak edges from all uses of a copy to
1636 /// the one use that defines the copy's source vreg, most likely an induction
1637 /// variable increment.
1638 class CopyConstrain : public ScheduleDAGMutation {
1639  // Transient state.
1640  SlotIndex RegionBeginIdx;
1641 
1642  // RegionEndIdx is the slot index of the last non-debug instruction in the
1643  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1644  SlotIndex RegionEndIdx;
1645 
1646 public:
1647  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1648 
1649  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1650 
1651 protected:
1652  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1653 };
1654 
1655 } // end anonymous namespace
1656 
1657 namespace llvm {
1658 
1659 std::unique_ptr<ScheduleDAGMutation>
1661  const TargetRegisterInfo *TRI) {
1662  return std::make_unique<CopyConstrain>(TII, TRI);
1663 }
1664 
1665 } // end namespace llvm
1666 
1667 /// constrainLocalCopy handles two possibilities:
1668 /// 1) Local src:
1669 /// I0: = dst
1670 /// I1: src = ...
1671 /// I2: = dst
1672 /// I3: dst = src (copy)
1673 /// (create pred->succ edges I0->I1, I2->I1)
1674 ///
1675 /// 2) Local copy:
1676 /// I0: dst = src (copy)
1677 /// I1: = dst
1678 /// I2: src = ...
1679 /// I3: = dst
1680 /// (create pred->succ edges I1->I2, I3->I2)
1681 ///
1682 /// Although the MachineScheduler is currently constrained to single blocks,
1683 /// this algorithm should handle extended blocks. An EBB is a set of
1684 /// contiguously numbered blocks such that the previous block in the EBB is
1685 /// always the single predecessor.
1686 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1687  LiveIntervals *LIS = DAG->getLIS();
1688  MachineInstr *Copy = CopySU->getInstr();
1689 
1690  // Check for pure vreg copies.
1691  const MachineOperand &SrcOp = Copy->getOperand(1);
1692  Register SrcReg = SrcOp.getReg();
1693  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1694  return;
1695 
1696  const MachineOperand &DstOp = Copy->getOperand(0);
1697  Register DstReg = DstOp.getReg();
1698  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1699  return;
1700 
1701  // Check if either the dest or source is local. If it's live across a back
1702  // edge, it's not local. Note that if both vregs are live across the back
1703  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1704  // If both the copy's source and dest are local live intervals, then we
1705  // should treat the dest as the global for the purpose of adding
1706  // constraints. This adds edges from source's other uses to the copy.
1707  unsigned LocalReg = SrcReg;
1708  unsigned GlobalReg = DstReg;
1709  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1710  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1711  LocalReg = DstReg;
1712  GlobalReg = SrcReg;
1713  LocalLI = &LIS->getInterval(LocalReg);
1714  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1715  return;
1716  }
1717  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1718 
1719  // Find the global segment after the start of the local LI.
1720  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1721  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1722  // local live range. We could create edges from other global uses to the local
1723  // start, but the coalescer should have already eliminated these cases, so
1724  // don't bother dealing with it.
1725  if (GlobalSegment == GlobalLI->end())
1726  return;
1727 
1728  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1729  // returned the next global segment. But if GlobalSegment overlaps with
1730  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1731  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1732  if (GlobalSegment->contains(LocalLI->beginIndex()))
1733  ++GlobalSegment;
1734 
1735  if (GlobalSegment == GlobalLI->end())
1736  return;
1737 
1738  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1739  if (GlobalSegment != GlobalLI->begin()) {
1740  // Two address defs have no hole.
1741  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1742  GlobalSegment->start)) {
1743  return;
1744  }
1745  // If the prior global segment may be defined by the same two-address
1746  // instruction that also defines LocalLI, then can't make a hole here.
1747  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1748  LocalLI->beginIndex())) {
1749  return;
1750  }
1751  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1752  // it would be a disconnected component in the live range.
1753  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1754  "Disconnected LRG within the scheduling region.");
1755  }
1756  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1757  if (!GlobalDef)
1758  return;
1759 
1760  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1761  if (!GlobalSU)
1762  return;
1763 
1764  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1765  // constraining the uses of the last local def to precede GlobalDef.
1766  SmallVector<SUnit*,8> LocalUses;
1767  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1768  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1769  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1770  for (const SDep &Succ : LastLocalSU->Succs) {
1771  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1772  continue;
1773  if (Succ.getSUnit() == GlobalSU)
1774  continue;
1775  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1776  return;
1777  LocalUses.push_back(Succ.getSUnit());
1778  }
1779  // Open the top of the GlobalLI hole by constraining any earlier global uses
1780  // to precede the start of LocalLI.
1781  SmallVector<SUnit*,8> GlobalUses;
1782  MachineInstr *FirstLocalDef =
1783  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1784  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1785  for (const SDep &Pred : GlobalSU->Preds) {
1786  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1787  continue;
1788  if (Pred.getSUnit() == FirstLocalSU)
1789  continue;
1790  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1791  return;
1792  GlobalUses.push_back(Pred.getSUnit());
1793  }
1794  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1795  // Add the weak edges.
1797  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1798  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1799  << GlobalSU->NodeNum << ")\n");
1800  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1801  }
1803  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1804  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1805  << FirstLocalSU->NodeNum << ")\n");
1806  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1807  }
1808 }
1809 
1810 /// Callback from DAG postProcessing to create weak edges to encourage
1811 /// copy elimination.
1812 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1813  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1814  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1815 
1816  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1817  if (FirstPos == DAG->end())
1818  return;
1819  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1820  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1821  *priorNonDebug(DAG->end(), DAG->begin()));
1822 
1823  for (SUnit &SU : DAG->SUnits) {
1824  if (!SU.getInstr()->isCopy())
1825  continue;
1826 
1827  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1828  }
1829 }
1830 
1831 //===----------------------------------------------------------------------===//
1832 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1833 // and possibly other custom schedulers.
1834 //===----------------------------------------------------------------------===//
1835 
1836 static const unsigned InvalidCycle = ~0U;
1837 
1838 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1839 
1840 /// Given a Count of resource usage and a Latency value, return true if a
1841 /// SchedBoundary becomes resource limited.
1842 /// If we are checking after scheduling a node, we should return true when
1843 /// we just reach the resource limit.
1844 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1845  unsigned Latency, bool AfterSchedNode) {
1846  int ResCntFactor = (int)(Count - (Latency * LFactor));
1847  if (AfterSchedNode)
1848  return ResCntFactor >= (int)LFactor;
1849  else
1850  return ResCntFactor > (int)LFactor;
1851 }
1852 
1854  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1855  // Destroying and reconstructing it is very expensive though. So keep
1856  // invalid, placeholder HazardRecs.
1857  if (HazardRec && HazardRec->isEnabled()) {
1858  delete HazardRec;
1859  HazardRec = nullptr;
1860  }
1861  Available.clear();
1862  Pending.clear();
1863  CheckPending = false;
1864  CurrCycle = 0;
1865  CurrMOps = 0;
1866  MinReadyCycle = std::numeric_limits<unsigned>::max();
1867  ExpectedLatency = 0;
1868  DependentLatency = 0;
1869  RetiredMOps = 0;
1870  MaxExecutedResCount = 0;
1871  ZoneCritResIdx = 0;
1872  IsResourceLimited = false;
1873  ReservedCycles.clear();
1874  ReservedCyclesIndex.clear();
1875 #ifndef NDEBUG
1876  // Track the maximum number of stall cycles that could arise either from the
1877  // latency of a DAG edge or the number of cycles that a processor resource is
1878  // reserved (SchedBoundary::ReservedCycles).
1879  MaxObservedStall = 0;
1880 #endif
1881  // Reserve a zero-count for invalid CritResIdx.
1882  ExecutedResCounts.resize(1);
1883  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1884 }
1885 
1886 void SchedRemainder::
1887 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1888  reset();
1889  if (!SchedModel->hasInstrSchedModel())
1890  return;
1891  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1892  for (SUnit &SU : DAG->SUnits) {
1893  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1894  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1895  * SchedModel->getMicroOpFactor();
1897  PI = SchedModel->getWriteProcResBegin(SC),
1898  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1899  unsigned PIdx = PI->ProcResourceIdx;
1900  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1901  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1902  }
1903  }
1904 }
1905 
1906 void SchedBoundary::
1908  reset();
1909  DAG = dag;
1910  SchedModel = smodel;
1911  Rem = rem;
1912  if (SchedModel->hasInstrSchedModel()) {
1913  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1914  ReservedCyclesIndex.resize(ResourceCount);
1915  ExecutedResCounts.resize(ResourceCount);
1916  unsigned NumUnits = 0;
1917 
1918  for (unsigned i = 0; i < ResourceCount; ++i) {
1919  ReservedCyclesIndex[i] = NumUnits;
1920  NumUnits += SchedModel->getProcResource(i)->NumUnits;
1921  }
1922 
1923  ReservedCycles.resize(NumUnits, InvalidCycle);
1924  }
1925 }
1926 
1927 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1928 /// these "soft stalls" differently than the hard stall cycles based on CPU
1929 /// resources and computed by checkHazard(). A fully in-order model
1930 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1931 /// available for scheduling until they are ready. However, a weaker in-order
1932 /// model may use this for heuristics. For example, if a processor has in-order
1933 /// behavior when reading certain resources, this may come into play.
1935  if (!SU->isUnbuffered)
1936  return 0;
1937 
1938  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1939  if (ReadyCycle > CurrCycle)
1940  return ReadyCycle - CurrCycle;
1941  return 0;
1942 }
1943 
1944 /// Compute the next cycle at which the given processor resource unit
1945 /// can be scheduled.
1947  unsigned Cycles) {
1948  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1949  // If this resource has never been used, always return cycle zero.
1950  if (NextUnreserved == InvalidCycle)
1951  return 0;
1952  // For bottom-up scheduling add the cycles needed for the current operation.
1953  if (!isTop())
1954  NextUnreserved += Cycles;
1955  return NextUnreserved;
1956 }
1957 
1958 /// Compute the next cycle at which the given processor resource can be
1959 /// scheduled. Returns the next cycle and the index of the processor resource
1960 /// instance in the reserved cycles vector.
1961 std::pair<unsigned, unsigned>
1962 SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1963  unsigned MinNextUnreserved = InvalidCycle;
1964  unsigned InstanceIdx = 0;
1965  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1966  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1967  assert(NumberOfInstances > 0 &&
1968  "Cannot have zero instances of a ProcResource");
1969 
1970  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1971  ++I) {
1972  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1973  if (MinNextUnreserved > NextUnreserved) {
1974  InstanceIdx = I;
1975  MinNextUnreserved = NextUnreserved;
1976  }
1977  }
1978  return std::make_pair(MinNextUnreserved, InstanceIdx);
1979 }
1980 
1981 /// Does this SU have a hazard within the current instruction group.
1982 ///
1983 /// The scheduler supports two modes of hazard recognition. The first is the
1984 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1985 /// supports highly complicated in-order reservation tables
1986 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1987 ///
1988 /// The second is a streamlined mechanism that checks for hazards based on
1989 /// simple counters that the scheduler itself maintains. It explicitly checks
1990 /// for instruction dispatch limitations, including the number of micro-ops that
1991 /// can dispatch per cycle.
1992 ///
1993 /// TODO: Also check whether the SU must start a new group.
1995  if (HazardRec->isEnabled()
1996  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1997  return true;
1998  }
1999 
2000  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2001  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2002  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2003  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2004  return true;
2005  }
2006 
2007  if (CurrMOps > 0 &&
2008  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2009  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2010  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2011  << (isTop() ? "begin" : "end") << " group\n");
2012  return true;
2013  }
2014 
2015  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2016  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2017  for (const MCWriteProcResEntry &PE :
2018  make_range(SchedModel->getWriteProcResBegin(SC),
2019  SchedModel->getWriteProcResEnd(SC))) {
2020  unsigned ResIdx = PE.ProcResourceIdx;
2021  unsigned Cycles = PE.Cycles;
2022  unsigned NRCycle, InstanceIdx;
2023  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2024  if (NRCycle > CurrCycle) {
2025 #ifndef NDEBUG
2026  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2027 #endif
2028  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2029  << SchedModel->getResourceName(ResIdx)
2030  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2031  << "=" << NRCycle << "c\n");
2032  return true;
2033  }
2034  }
2035  }
2036  return false;
2037 }
2038 
2039 // Find the unscheduled node in ReadySUs with the highest latency.
2040 unsigned SchedBoundary::
2042  SUnit *LateSU = nullptr;
2043  unsigned RemLatency = 0;
2044  for (SUnit *SU : ReadySUs) {
2045  unsigned L = getUnscheduledLatency(SU);
2046  if (L > RemLatency) {
2047  RemLatency = L;
2048  LateSU = SU;
2049  }
2050  }
2051  if (LateSU) {
2052  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2053  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2054  }
2055  return RemLatency;
2056 }
2057 
2058 // Count resources in this zone and the remaining unscheduled
2059 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2060 // resource index, or zero if the zone is issue limited.
2061 unsigned SchedBoundary::
2062 getOtherResourceCount(unsigned &OtherCritIdx) {
2063  OtherCritIdx = 0;
2064  if (!SchedModel->hasInstrSchedModel())
2065  return 0;
2066 
2067  unsigned OtherCritCount = Rem->RemIssueCount
2068  + (RetiredMOps * SchedModel->getMicroOpFactor());
2069  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2070  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2071  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2072  PIdx != PEnd; ++PIdx) {
2073  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2074  if (OtherCount > OtherCritCount) {
2075  OtherCritCount = OtherCount;
2076  OtherCritIdx = PIdx;
2077  }
2078  }
2079  if (OtherCritIdx) {
2080  LLVM_DEBUG(
2081  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2082  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2083  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2084  }
2085  return OtherCritCount;
2086 }
2087 
2088 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2089  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2090 
2091 #ifndef NDEBUG
2092  // ReadyCycle was been bumped up to the CurrCycle when this node was
2093  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2094  // scheduling, so may now be greater than ReadyCycle.
2095  if (ReadyCycle > CurrCycle)
2096  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2097 #endif
2098 
2099  if (ReadyCycle < MinReadyCycle)
2100  MinReadyCycle = ReadyCycle;
2101 
2102  // Check for interlocks first. For the purpose of other heuristics, an
2103  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2104  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2105  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2106  Available.size() >= ReadyListLimit)
2107  Pending.push(SU);
2108  else
2109  Available.push(SU);
2110 }
2111 
2112 /// Move the boundary of scheduled code by one cycle.
2113 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2114  if (SchedModel->getMicroOpBufferSize() == 0) {
2115  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2116  "MinReadyCycle uninitialized");
2117  if (MinReadyCycle > NextCycle)
2118  NextCycle = MinReadyCycle;
2119  }
2120  // Update the current micro-ops, which will issue in the next cycle.
2121  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2122  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2123 
2124  // Decrement DependentLatency based on the next cycle.
2125  if ((NextCycle - CurrCycle) > DependentLatency)
2126  DependentLatency = 0;
2127  else
2128  DependentLatency -= (NextCycle - CurrCycle);
2129 
2130  if (!HazardRec->isEnabled()) {
2131  // Bypass HazardRec virtual calls.
2132  CurrCycle = NextCycle;
2133  } else {
2134  // Bypass getHazardType calls in case of long latency.
2135  for (; CurrCycle != NextCycle; ++CurrCycle) {
2136  if (isTop())
2137  HazardRec->AdvanceCycle();
2138  else
2139  HazardRec->RecedeCycle();
2140  }
2141  }
2142  CheckPending = true;
2143  IsResourceLimited =
2144  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2145  getScheduledLatency(), true);
2146 
2147  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2148  << '\n');
2149 }
2150 
2151 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2152  ExecutedResCounts[PIdx] += Count;
2153  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2154  MaxExecutedResCount = ExecutedResCounts[PIdx];
2155 }
2156 
2157 /// Add the given processor resource to this scheduled zone.
2158 ///
2159 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2160 /// during which this resource is consumed.
2161 ///
2162 /// \return the next cycle at which the instruction may execute without
2163 /// oversubscribing resources.
2164 unsigned SchedBoundary::
2165 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2166  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2167  unsigned Count = Factor * Cycles;
2168  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2169  << Cycles << "x" << Factor << "u\n");
2170 
2171  // Update Executed resources counts.
2172  incExecutedResources(PIdx, Count);
2173  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2174  Rem->RemainingCounts[PIdx] -= Count;
2175 
2176  // Check if this resource exceeds the current critical resource. If so, it
2177  // becomes the critical resource.
2178  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2179  ZoneCritResIdx = PIdx;
2180  LLVM_DEBUG(dbgs() << " *** Critical resource "
2181  << SchedModel->getResourceName(PIdx) << ": "
2182  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2183  << "c\n");
2184  }
2185  // For reserved resources, record the highest cycle using the resource.
2186  unsigned NextAvailable, InstanceIdx;
2187  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2188  if (NextAvailable > CurrCycle) {
2189  LLVM_DEBUG(dbgs() << " Resource conflict: "
2190  << SchedModel->getResourceName(PIdx)
2191  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2192  << " reserved until @" << NextAvailable << "\n");
2193  }
2194  return NextAvailable;
2195 }
2196 
2197 /// Move the boundary of scheduled code by one SUnit.
2199  // Update the reservation table.
2200  if (HazardRec->isEnabled()) {
2201  if (!isTop() && SU->isCall) {
2202  // Calls are scheduled with their preceding instructions. For bottom-up
2203  // scheduling, clear the pipeline state before emitting.
2204  HazardRec->Reset();
2205  }
2206  HazardRec->EmitInstruction(SU);
2207  // Scheduling an instruction may have made pending instructions available.
2208  CheckPending = true;
2209  }
2210  // checkHazard should prevent scheduling multiple instructions per cycle that
2211  // exceed the issue width.
2212  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2213  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2214  assert(
2215  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2216  "Cannot schedule this instruction's MicroOps in the current cycle.");
2217 
2218  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2219  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2220 
2221  unsigned NextCycle = CurrCycle;
2222  switch (SchedModel->getMicroOpBufferSize()) {
2223  case 0:
2224  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2225  break;
2226  case 1:
2227  if (ReadyCycle > NextCycle) {
2228  NextCycle = ReadyCycle;
2229  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2230  }
2231  break;
2232  default:
2233  // We don't currently model the OOO reorder buffer, so consider all
2234  // scheduled MOps to be "retired". We do loosely model in-order resource
2235  // latency. If this instruction uses an in-order resource, account for any
2236  // likely stall cycles.
2237  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2238  NextCycle = ReadyCycle;
2239  break;
2240  }
2241  RetiredMOps += IncMOps;
2242 
2243  // Update resource counts and critical resource.
2244  if (SchedModel->hasInstrSchedModel()) {
2245  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2246  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2247  Rem->RemIssueCount -= DecRemIssue;
2248  if (ZoneCritResIdx) {
2249  // Scale scheduled micro-ops for comparing with the critical resource.
2250  unsigned ScaledMOps =
2251  RetiredMOps * SchedModel->getMicroOpFactor();
2252 
2253  // If scaled micro-ops are now more than the previous critical resource by
2254  // a full cycle, then micro-ops issue becomes critical.
2255  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2256  >= (int)SchedModel->getLatencyFactor()) {
2257  ZoneCritResIdx = 0;
2258  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2259  << ScaledMOps / SchedModel->getLatencyFactor()
2260  << "c\n");
2261  }
2262  }
2264  PI = SchedModel->getWriteProcResBegin(SC),
2265  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2266  unsigned RCycle =
2267  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2268  if (RCycle > NextCycle)
2269  NextCycle = RCycle;
2270  }
2271  if (SU->hasReservedResource) {
2272  // For reserved resources, record the highest cycle using the resource.
2273  // For top-down scheduling, this is the cycle in which we schedule this
2274  // instruction plus the number of cycles the operations reserves the
2275  // resource. For bottom-up is it simply the instruction's cycle.
2277  PI = SchedModel->getWriteProcResBegin(SC),
2278  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2279  unsigned PIdx = PI->ProcResourceIdx;
2280  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2281  unsigned ReservedUntil, InstanceIdx;
2282  std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2283  if (isTop()) {
2284  ReservedCycles[InstanceIdx] =
2285  std::max(ReservedUntil, NextCycle + PI->Cycles);
2286  } else
2287  ReservedCycles[InstanceIdx] = NextCycle;
2288  }
2289  }
2290  }
2291  }
2292  // Update ExpectedLatency and DependentLatency.
2293  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2294  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2295  if (SU->getDepth() > TopLatency) {
2296  TopLatency = SU->getDepth();
2297  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2298  << SU->NodeNum << ") " << TopLatency << "c\n");
2299  }
2300  if (SU->getHeight() > BotLatency) {
2301  BotLatency = SU->getHeight();
2302  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2303  << SU->NodeNum << ") " << BotLatency << "c\n");
2304  }
2305  // If we stall for any reason, bump the cycle.
2306  if (NextCycle > CurrCycle)
2307  bumpCycle(NextCycle);
2308  else
2309  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2310  // resource limited. If a stall occurred, bumpCycle does this.
2311  IsResourceLimited =
2312  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2313  getScheduledLatency(), true);
2314 
2315  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2316  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2317  // one cycle. Since we commonly reach the max MOps here, opportunistically
2318  // bump the cycle to avoid uselessly checking everything in the readyQ.
2319  CurrMOps += IncMOps;
2320 
2321  // Bump the cycle count for issue group constraints.
2322  // This must be done after NextCycle has been adjust for all other stalls.
2323  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2324  // currCycle to X.
2325  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2326  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2327  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2328  << " group\n");
2329  bumpCycle(++NextCycle);
2330  }
2331 
2332  while (CurrMOps >= SchedModel->getIssueWidth()) {
2333  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2334  << CurrCycle << '\n');
2335  bumpCycle(++NextCycle);
2336  }
2337  LLVM_DEBUG(dumpScheduledState());
2338 }
2339 
2340 /// Release pending ready nodes in to the available queue. This makes them
2341 /// visible to heuristics.
2343  // If the available queue is empty, it is safe to reset MinReadyCycle.
2344  if (Available.empty())
2345  MinReadyCycle = std::numeric_limits<unsigned>::max();
2346 
2347  // Check to see if any of the pending instructions are ready to issue. If
2348  // so, add them to the available queue.
2349  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2350  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2351  SUnit *SU = *(Pending.begin()+i);
2352  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2353 
2354  if (ReadyCycle < MinReadyCycle)
2355  MinReadyCycle = ReadyCycle;
2356 
2357  if (!IsBuffered && ReadyCycle > CurrCycle)
2358  continue;
2359 
2360  if (checkHazard(SU))
2361  continue;
2362 
2363  if (Available.size() >= ReadyListLimit)
2364  break;
2365 
2366  Available.push(SU);
2367  Pending.remove(Pending.begin()+i);
2368  --i; --e;
2369  }
2370  CheckPending = false;
2371 }
2372 
2373 /// Remove SU from the ready set for this boundary.
2375  if (Available.isInQueue(SU))
2376  Available.remove(Available.find(SU));
2377  else {
2378  assert(Pending.isInQueue(SU) && "bad ready count");
2379  Pending.remove(Pending.find(SU));
2380  }
2381 }
2382 
2383 /// If this queue only has one ready candidate, return it. As a side effect,
2384 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2385 /// one node is ready. If multiple instructions are ready, return NULL.
2387  if (CheckPending)
2388  releasePending();
2389 
2390  if (CurrMOps > 0) {
2391  // Defer any ready instrs that now have a hazard.
2392  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2393  if (checkHazard(*I)) {
2394  Pending.push(*I);
2395  I = Available.remove(I);
2396  continue;
2397  }
2398  ++I;
2399  }
2400  }
2401  for (unsigned i = 0; Available.empty(); ++i) {
2402 // FIXME: Re-enable assert once PR20057 is resolved.
2403 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2404 // "permanent hazard");
2405  (void)i;
2406  bumpCycle(CurrCycle + 1);
2407  releasePending();
2408  }
2409 
2410  LLVM_DEBUG(Pending.dump());
2411  LLVM_DEBUG(Available.dump());
2412 
2413  if (Available.size() == 1)
2414  return *Available.begin();
2415  return nullptr;
2416 }
2417 
2418 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2419 // This is useful information to dump after bumpNode.
2420 // Note that the Queue contents are more useful before pickNodeFromQueue.
2422  unsigned ResFactor;
2423  unsigned ResCount;
2424  if (ZoneCritResIdx) {
2425  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2426  ResCount = getResourceCount(ZoneCritResIdx);
2427  } else {
2428  ResFactor = SchedModel->getMicroOpFactor();
2429  ResCount = RetiredMOps * ResFactor;
2430  }
2431  unsigned LFactor = SchedModel->getLatencyFactor();
2432  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2433  << " Retired: " << RetiredMOps;
2434  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2435  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2436  << ResCount / ResFactor << " "
2437  << SchedModel->getResourceName(ZoneCritResIdx)
2438  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2439  << (IsResourceLimited ? " - Resource" : " - Latency")
2440  << " limited.\n";
2441 }
2442 #endif
2443 
2444 //===----------------------------------------------------------------------===//
2445 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2446 //===----------------------------------------------------------------------===//
2447 
2450  const TargetSchedModel *SchedModel) {
2451  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2452  return;
2453 
2454  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2456  PI = SchedModel->getWriteProcResBegin(SC),
2457  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2458  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2459  ResDelta.CritResources += PI->Cycles;
2460  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2461  ResDelta.DemandedResources += PI->Cycles;
2462  }
2463 }
2464 
2465 /// Compute remaining latency. We need this both to determine whether the
2466 /// overall schedule has become latency-limited and whether the instructions
2467 /// outside this zone are resource or latency limited.
2468 ///
2469 /// The "dependent" latency is updated incrementally during scheduling as the
2470 /// max height/depth of scheduled nodes minus the cycles since it was
2471 /// scheduled:
2472 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2473 ///
2474 /// The "independent" latency is the max ready queue depth:
2475 /// ILat = max N.depth for N in Available|Pending
2476 ///
2477 /// RemainingLatency is the greater of independent and dependent latency.
2478 ///
2479 /// These computations are expensive, especially in DAGs with many edges, so
2480 /// only do them if necessary.
2481 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2482  unsigned RemLatency = CurrZone.getDependentLatency();
2483  RemLatency = std::max(RemLatency,
2484  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2485  RemLatency = std::max(RemLatency,
2486  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2487  return RemLatency;
2488 }
2489 
2490 /// Returns true if the current cycle plus remaning latency is greater than
2491 /// the critical path in the scheduling region.
2492 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2493  SchedBoundary &CurrZone,
2494  bool ComputeRemLatency,
2495  unsigned &RemLatency) const {
2496  // The current cycle is already greater than the critical path, so we are
2497  // already latency limited and don't need to compute the remaining latency.
2498  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2499  return true;
2500 
2501  // If we haven't scheduled anything yet, then we aren't latency limited.
2502  if (CurrZone.getCurrCycle() == 0)
2503  return false;
2504 
2505  if (ComputeRemLatency)
2506  RemLatency = computeRemLatency(CurrZone);
2507 
2508  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2509 }
2510 
2511 /// Set the CandPolicy given a scheduling zone given the current resources and
2512 /// latencies inside and outside the zone.
2513 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2514  SchedBoundary &CurrZone,
2515  SchedBoundary *OtherZone) {
2516  // Apply preemptive heuristics based on the total latency and resources
2517  // inside and outside this zone. Potential stalls should be considered before
2518  // following this policy.
2519 
2520  // Compute the critical resource outside the zone.
2521  unsigned OtherCritIdx = 0;
2522  unsigned OtherCount =
2523  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2524 
2525  bool OtherResLimited = false;
2526  unsigned RemLatency = 0;
2527  bool RemLatencyComputed = false;
2528  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2529  RemLatency = computeRemLatency(CurrZone);
2530  RemLatencyComputed = true;
2531  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2532  OtherCount, RemLatency, false);
2533  }
2534 
2535  // Schedule aggressively for latency in PostRA mode. We don't check for
2536  // acyclic latency during PostRA, and highly out-of-order processors will
2537  // skip PostRA scheduling.
2538  if (!OtherResLimited &&
2539  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2540  RemLatency))) {
2541  Policy.ReduceLatency |= true;
2542  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2543  << " RemainingLatency " << RemLatency << " + "
2544  << CurrZone.getCurrCycle() << "c > CritPath "
2545  << Rem.CriticalPath << "\n");
2546  }
2547  // If the same resource is limiting inside and outside the zone, do nothing.
2548  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2549  return;
2550 
2551  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2552  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2553  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2554  } if (OtherResLimited) dbgs()
2555  << " RemainingLimit: "
2556  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2557  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2558  << " Latency limited both directions.\n");
2559 
2560  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2561  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2562 
2563  if (OtherResLimited)
2564  Policy.DemandResIdx = OtherCritIdx;
2565 }
2566 
2567 #ifndef NDEBUG
2570  switch (Reason) {
2571  case NoCand: return "NOCAND ";
2572  case Only1: return "ONLY1 ";
2573  case PhysReg: return "PHYS-REG ";
2574  case RegExcess: return "REG-EXCESS";
2575  case RegCritical: return "REG-CRIT ";
2576  case Stall: return "STALL ";
2577  case Cluster: return "CLUSTER ";
2578  case Weak: return "WEAK ";
2579  case RegMax: return "REG-MAX ";
2580  case ResourceReduce: return "RES-REDUCE";
2581  case ResourceDemand: return "RES-DEMAND";
2582  case TopDepthReduce: return "TOP-DEPTH ";
2583  case TopPathReduce: return "TOP-PATH ";
2584  case BotHeightReduce:return "BOT-HEIGHT";
2585  case BotPathReduce: return "BOT-PATH ";
2586  case NextDefUse: return "DEF-USE ";
2587  case NodeOrder: return "ORDER ";
2588  };
2589  llvm_unreachable("Unknown reason!");
2590 }
2591 
2593  PressureChange P;
2594  unsigned ResIdx = 0;
2595  unsigned Latency = 0;
2596  switch (Cand.Reason) {
2597  default:
2598  break;
2599  case RegExcess:
2600  P = Cand.RPDelta.Excess;
2601  break;
2602  case RegCritical:
2603  P = Cand.RPDelta.CriticalMax;
2604  break;
2605  case RegMax:
2606  P = Cand.RPDelta.CurrentMax;
2607  break;
2608  case ResourceReduce:
2609  ResIdx = Cand.Policy.ReduceResIdx;
2610  break;
2611  case ResourceDemand:
2612  ResIdx = Cand.Policy.DemandResIdx;
2613  break;
2614  case TopDepthReduce:
2615  Latency = Cand.SU->getDepth();
2616  break;
2617  case TopPathReduce:
2618  Latency = Cand.SU->getHeight();
2619  break;
2620  case BotHeightReduce:
2621  Latency = Cand.SU->getHeight();
2622  break;
2623  case BotPathReduce:
2624  Latency = Cand.SU->getDepth();
2625  break;
2626  }
2627  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2628  if (P.isValid())
2629  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2630  << ":" << P.getUnitInc() << " ";
2631  else
2632  dbgs() << " ";
2633  if (ResIdx)
2634  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2635  else
2636  dbgs() << " ";
2637  if (Latency)
2638  dbgs() << " " << Latency << " cycles ";
2639  else
2640  dbgs() << " ";
2641  dbgs() << '\n';
2642 }
2643 #endif
2644 
2645 namespace llvm {
2646 /// Return true if this heuristic determines order.
2647 bool tryLess(int TryVal, int CandVal,
2651  if (TryVal < CandVal) {
2652  TryCand.Reason = Reason;
2653  return true;
2654  }
2655  if (TryVal > CandVal) {
2656  if (Cand.Reason > Reason)
2657  Cand.Reason = Reason;
2658  return true;
2659  }
2660  return false;
2661 }
2662 
2663 bool tryGreater(int TryVal, int CandVal,
2667  if (TryVal > CandVal) {
2668  TryCand.Reason = Reason;
2669  return true;
2670  }
2671  if (TryVal < CandVal) {
2672  if (Cand.Reason > Reason)
2673  Cand.Reason = Reason;
2674  return true;
2675  }
2676  return false;
2677 }
2678 
2681  SchedBoundary &Zone) {
2682  if (Zone.isTop()) {
2683  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2684  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2685  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2686  return true;
2687  }
2688  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2689  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2690  return true;
2691  } else {
2692  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2693  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2694  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2695  return true;
2696  }
2697  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2698  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2699  return true;
2700  }
2701  return false;
2702 }
2703 } // end namespace llvm
2704 
2705 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2706  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2707  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2708 }
2709 
2711  tracePick(Cand.Reason, Cand.AtTop);
2712 }
2713 
2715  assert(dag->hasVRegLiveness() &&
2716  "(PreRA)GenericScheduler needs vreg liveness");
2717  DAG = static_cast<ScheduleDAGMILive*>(dag);
2718  SchedModel = DAG->getSchedModel();
2719  TRI = DAG->TRI;
2720 
2721  Rem.init(DAG, SchedModel);
2722  Top.init(DAG, SchedModel, &Rem);
2723  Bot.init(DAG, SchedModel, &Rem);
2724 
2725  // Initialize resource counts.
2726 
2727  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2728  // are disabled, then these HazardRecs will be disabled.
2729  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2730  if (!Top.HazardRec) {
2731  Top.HazardRec =
2732  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2733  Itin, DAG);
2734  }
2735  if (!Bot.HazardRec) {
2736  Bot.HazardRec =
2737  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2738  Itin, DAG);
2739  }
2740  TopCand.SU = nullptr;
2741  BotCand.SU = nullptr;
2742 }
2743 
2744 /// Initialize the per-region scheduling policy.
2747  unsigned NumRegionInstrs) {
2748  const MachineFunction &MF = *Begin->getMF();
2749  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2750 
2751  // Avoid setting up the register pressure tracker for small regions to save
2752  // compile time. As a rough heuristic, only track pressure when the number of
2753  // schedulable instructions exceeds half the integer register file.
2754  RegionPolicy.ShouldTrackPressure = true;
2755  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2756  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2757  if (TLI->isTypeLegal(LegalIntVT)) {
2758  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2759  TLI->getRegClassFor(LegalIntVT));
2760  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2761  }
2762  }
2763 
2764  // For generic targets, we default to bottom-up, because it's simpler and more
2765  // compile-time optimizations have been implemented in that direction.
2766  RegionPolicy.OnlyBottomUp = true;
2767 
2768  // Allow the subtarget to override default policy.
2769  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2770 
2771  // After subtarget overrides, apply command line options.
2772  if (!EnableRegPressure) {
2773  RegionPolicy.ShouldTrackPressure = false;
2774  RegionPolicy.ShouldTrackLaneMasks = false;
2775  }
2776 
2777  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2778  // e.g. -misched-bottomup=false allows scheduling in both directions.
2780  "-misched-topdown incompatible with -misched-bottomup");
2781  if (ForceBottomUp.getNumOccurrences() > 0) {
2782  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2783  if (RegionPolicy.OnlyBottomUp)
2784  RegionPolicy.OnlyTopDown = false;
2785  }
2786  if (ForceTopDown.getNumOccurrences() > 0) {
2787  RegionPolicy.OnlyTopDown = ForceTopDown;
2788  if (RegionPolicy.OnlyTopDown)
2789  RegionPolicy.OnlyBottomUp = false;
2790  }
2791 }
2792 
2794  // Cannot completely remove virtual function even in release mode.
2795 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2796  dbgs() << "GenericScheduler RegionPolicy: "
2797  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2798  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2799  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2800  << "\n";
2801 #endif
2802 }
2803 
2804 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2805 /// critical path by more cycles than it takes to drain the instruction buffer.
2806 /// We estimate an upper bounds on in-flight instructions as:
2807 ///
2808 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2809 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2810 /// InFlightResources = InFlightIterations * LoopResources
2811 ///
2812 /// TODO: Check execution resources in addition to IssueCount.
2814  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2815  return;
2816 
2817  // Scaled number of cycles per loop iteration.
2818  unsigned IterCount =
2819  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2820  Rem.RemIssueCount);
2821  // Scaled acyclic critical path.
2822  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2823  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2824  unsigned InFlightCount =
2825  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2826  unsigned BufferLimit =
2827  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2828 
2829  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2830 
2831  LLVM_DEBUG(
2832  dbgs() << "IssueCycles="
2833  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2834  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2835  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2836  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2837  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2838  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2839 }
2840 
2842  Rem.CriticalPath = DAG->ExitSU.getDepth();
2843 
2844  // Some roots may not feed into ExitSU. Check all of them in case.
2845  for (const SUnit *SU : Bot.Available) {
2846  if (SU->getDepth() > Rem.CriticalPath)
2847  Rem.CriticalPath = SU->getDepth();
2848  }
2849  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2850  if (DumpCriticalPathLength) {
2851  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2852  }
2853 
2854  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2855  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2856  checkAcyclicLatency();
2857  }
2858 }
2859 
2860 namespace llvm {
2861 bool tryPressure(const PressureChange &TryP,
2862  const PressureChange &CandP,
2866  const TargetRegisterInfo *TRI,
2867  const MachineFunction &MF) {
2868  // If one candidate decreases and the other increases, go with it.
2869  // Invalid candidates have UnitInc==0.
2870  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2871  Reason)) {
2872  return true;
2873  }
2874  // Do not compare the magnitude of pressure changes between top and bottom
2875  // boundary.
2876  if (Cand.AtTop != TryCand.AtTop)
2877  return false;
2878 
2879  // If both candidates affect the same set in the same boundary, go with the
2880  // smallest increase.
2881  unsigned TryPSet = TryP.getPSetOrMax();
2882  unsigned CandPSet = CandP.getPSetOrMax();
2883  if (TryPSet == CandPSet) {
2884  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2885  Reason);
2886  }
2887 
2888  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2890 
2891  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2893 
2894  // If the candidates are decreasing pressure, reverse priority.
2895  if (TryP.getUnitInc() < 0)
2896  std::swap(TryRank, CandRank);
2897  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2898 }
2899 
2900 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2901  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2902 }
2903 
2904 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2905 /// their physreg def/use.
2906 ///
2907 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2908 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2909 /// with the operation that produces or consumes the physreg. We'll do this when
2910 /// regalloc has support for parallel copies.
2911 int biasPhysReg(const SUnit *SU, bool isTop) {
2912  const MachineInstr *MI = SU->getInstr();
2913 
2914  if (MI->isCopy()) {
2915  unsigned ScheduledOper = isTop ? 1 : 0;
2916  unsigned UnscheduledOper = isTop ? 0 : 1;
2917  // If we have already scheduled the physreg produce/consumer, immediately
2918  // schedule the copy.
2919  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
2920  return 1;
2921  // If the physreg is at the boundary, defer it. Otherwise schedule it
2922  // immediately to free the dependent. We can hoist the copy later.
2923  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2924  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
2925  return AtBoundary ? -1 : 1;
2926  }
2927 
2928  if (MI->isMoveImmediate()) {
2929  // If we have a move immediate and all successors have been assigned, bias
2930  // towards scheduling this later. Make sure all register defs are to
2931  // physical registers.
2932  bool DoBias = true;
2933  for (const MachineOperand &Op : MI->defs()) {
2934  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
2935  DoBias = false;
2936  break;
2937  }
2938  }
2939 
2940  if (DoBias)
2941  return isTop ? -1 : 1;
2942  }
2943 
2944  return 0;
2945 }
2946 } // end namespace llvm
2947 
2949  bool AtTop,
2950  const RegPressureTracker &RPTracker,
2951  RegPressureTracker &TempTracker) {
2952  Cand.SU = SU;
2953  Cand.AtTop = AtTop;
2954  if (DAG->isTrackingPressure()) {
2955  if (AtTop) {
2956  TempTracker.getMaxDownwardPressureDelta(
2957  Cand.SU->getInstr(),
2958  Cand.RPDelta,
2959  DAG->getRegionCriticalPSets(),
2961  } else {
2962  if (VerifyScheduling) {
2963  TempTracker.getMaxUpwardPressureDelta(
2964  Cand.SU->getInstr(),
2965  &DAG->getPressureDiff(Cand.SU),
2966  Cand.RPDelta,
2967  DAG->getRegionCriticalPSets(),
2969  } else {
2970  RPTracker.getUpwardPressureDelta(
2971  Cand.SU->getInstr(),
2972  DAG->getPressureDiff(Cand.SU),
2973  Cand.RPDelta,
2974  DAG->getRegionCriticalPSets(),
2976  }
2977  }
2978  }
2979  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2980  << " Try SU(" << Cand.SU->NodeNum << ") "
2981  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2982  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2983 }
2984 
2985 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2986 /// hierarchical. This may be more efficient than a graduated cost model because
2987 /// we don't need to evaluate all aspects of the model for each node in the
2988 /// queue. But it's really done to make the heuristics easier to debug and
2989 /// statistically analyze.
2990 ///
2991 /// \param Cand provides the policy and current best candidate.
2992 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2993 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2994 // if Cand is from a different zone than TryCand.
2996  SchedCandidate &TryCand,
2997  SchedBoundary *Zone) const {
2998  // Initialize the candidate if needed.
2999  if (!Cand.isValid()) {
3000  TryCand.Reason = NodeOrder;
3001  return;
3002  }
3003 
3004  // Bias PhysReg Defs and copies to their uses and defined respectively.
3005  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3006  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3007  return;
3008 
3009  // Avoid exceeding the target's limit.
3010  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3011  Cand.RPDelta.Excess,
3012  TryCand, Cand, RegExcess, TRI,
3013  DAG->MF))
3014  return;
3015 
3016  // Avoid increasing the max critical pressure in the scheduled region.
3017  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3018  Cand.RPDelta.CriticalMax,
3019  TryCand, Cand, RegCritical, TRI,
3020  DAG->MF))
3021  return;
3022 
3023  // We only compare a subset of features when comparing nodes between
3024  // Top and Bottom boundary. Some properties are simply incomparable, in many
3025  // other instances we should only override the other boundary if something
3026  // is a clear good pick on one boundary. Skip heuristics that are more
3027  // "tie-breaking" in nature.
3028  bool SameBoundary = Zone != nullptr;
3029  if (SameBoundary) {
3030  // For loops that are acyclic path limited, aggressively schedule for
3031  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3032  // heuristics to take precedence.
3033  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3034  tryLatency(TryCand, Cand, *Zone))
3035  return;
3036 
3037  // Prioritize instructions that read unbuffered resources by stall cycles.
3038  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3039  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3040  return;
3041  }
3042 
3043  // Keep clustered nodes together to encourage downstream peephole
3044  // optimizations which may reduce resource requirements.
3045  //
3046  // This is a best effort to set things up for a post-RA pass. Optimizations
3047  // like generating loads of multiple registers should ideally be done within
3048  // the scheduler pass by combining the loads during DAG postprocessing.
3049  const SUnit *CandNextClusterSU =
3050  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3051  const SUnit *TryCandNextClusterSU =
3052  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3053  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3054  Cand.SU == CandNextClusterSU,
3055  TryCand, Cand, Cluster))
3056  return;
3057 
3058  if (SameBoundary) {
3059  // Weak edges are for clustering and other constraints.
3060  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3061  getWeakLeft(Cand.SU, Cand.AtTop),
3062  TryCand, Cand, Weak))
3063  return;
3064  }
3065 
3066  // Avoid increasing the max pressure of the entire region.
3067  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3068  Cand.RPDelta.CurrentMax,
3069  TryCand, Cand, RegMax, TRI,
3070  DAG->MF))
3071  return;
3072 
3073  if (SameBoundary) {
3074  // Avoid critical resource consumption and balance the schedule.
3075  TryCand.initResourceDelta(DAG, SchedModel);
3077  TryCand, Cand, ResourceReduce))
3078  return;
3081  TryCand, Cand, ResourceDemand))
3082  return;
3083 
3084  // Avoid serializing long latency dependence chains.
3085  // For acyclic path limited loops, latency was already checked above.
3086  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3087  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3088  return;
3089 
3090  // Fall through to original instruction order.
3091  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3092  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3093  TryCand.Reason = NodeOrder;
3094  }
3095  }
3096 }
3097 
3098 /// Pick the best candidate from the queue.
3099 ///
3100 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3101 /// DAG building. To adjust for the current scheduling location we need to
3102 /// maintain the number of vreg uses remaining to be top-scheduled.
3104  const CandPolicy &ZonePolicy,
3105  const RegPressureTracker &RPTracker,
3106  SchedCandidate &Cand) {
3107  // getMaxPressureDelta temporarily modifies the tracker.
3108  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3109 
3110  ReadyQueue &Q = Zone.Available;
3111  for (SUnit *SU : Q) {
3112 
3113  SchedCandidate TryCand(ZonePolicy);
3114  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3115  // Pass SchedBoundary only when comparing nodes from the same boundary.
3116  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3117  tryCandidate(Cand, TryCand, ZoneArg);
3118  if (TryCand.Reason != NoCand) {
3119  // Initialize resource delta if needed in case future heuristics query it.
3120  if (TryCand.ResDelta == SchedResourceDelta())
3121  TryCand.initResourceDelta(DAG, SchedModel);
3122  Cand.setBest(TryCand);
3123  LLVM_DEBUG(traceCandidate(Cand));
3124  }
3125  }
3126 }
3127 
3128 /// Pick the best candidate node from either the top or bottom queue.
3130  // Schedule as far as possible in the direction of no choice. This is most
3131  // efficient, but also provides the best heuristics for CriticalPSets.
3132  if (SUnit *SU = Bot.pickOnlyChoice()) {
3133  IsTopNode = false;
3134  tracePick(Only1, false);
3135  return SU;
3136  }
3137  if (SUnit *SU = Top.pickOnlyChoice()) {
3138  IsTopNode = true;
3139  tracePick(Only1, true);
3140  return SU;
3141  }
3142  // Set the bottom-up policy based on the state of the current bottom zone and
3143  // the instructions outside the zone, including the top zone.
3144  CandPolicy BotPolicy;
3145  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3146  // Set the top-down policy based on the state of the current top zone and
3147  // the instructions outside the zone, including the bottom zone.
3148  CandPolicy TopPolicy;
3149  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3150 
3151  // See if BotCand is still valid (because we previously scheduled from Top).
3152  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3153  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3154  BotCand.Policy != BotPolicy) {
3155  BotCand.reset(CandPolicy());
3156  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3157  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3158  } else {
3159  LLVM_DEBUG(traceCandidate(BotCand));
3160 #ifndef NDEBUG
3161  if (VerifyScheduling) {
3162  SchedCandidate TCand;
3163  TCand.reset(CandPolicy());
3164  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3165  assert(TCand.SU == BotCand.SU &&
3166  "Last pick result should correspond to re-picking right now");
3167  }
3168 #endif
3169  }
3170 
3171  // Check if the top Q has a better candidate.
3172  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3173  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3174  TopCand.Policy != TopPolicy) {
3175  TopCand.reset(CandPolicy());
3176  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3177  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3178  } else {
3179  LLVM_DEBUG(traceCandidate(TopCand));
3180 #ifndef NDEBUG
3181  if (VerifyScheduling) {
3182  SchedCandidate TCand;
3183  TCand.reset(CandPolicy());
3184  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3185  assert(TCand.SU == TopCand.SU &&
3186  "Last pick result should correspond to re-picking right now");
3187  }
3188 #endif
3189  }
3190 
3191  // Pick best from BotCand and TopCand.
3192  assert(BotCand.isValid());
3193  assert(TopCand.isValid());
3194  SchedCandidate Cand = BotCand;
3195  TopCand.Reason = NoCand;
3196  tryCandidate(Cand, TopCand, nullptr);
3197  if (TopCand.Reason != NoCand) {
3198  Cand.setBest(TopCand);
3199  LLVM_DEBUG(traceCandidate(Cand));
3200  }
3201 
3202  IsTopNode = Cand.AtTop;
3203  tracePick(Cand);
3204  return Cand.SU;
3205 }
3206 
3207 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3209  if (DAG->top() == DAG->bottom()) {
3210  assert(Top.Available.empty() && Top.Pending.empty() &&
3211  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3212  return nullptr;
3213  }
3214  SUnit *SU;
3215  do {
3216  if (RegionPolicy.OnlyTopDown) {
3217  SU = Top.pickOnlyChoice();
3218  if (!SU) {
3219  CandPolicy NoPolicy;
3220  TopCand.reset(NoPolicy);
3221  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3222  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3223  tracePick(TopCand);
3224  SU = TopCand.SU;
3225  }
3226  IsTopNode = true;
3227  } else if (RegionPolicy.OnlyBottomUp) {
3228  SU = Bot.pickOnlyChoice();
3229  if (!SU) {
3230  CandPolicy NoPolicy;
3231  BotCand.reset(NoPolicy);
3232  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3233  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3234  tracePick(BotCand);
3235  SU = BotCand.SU;
3236  }
3237  IsTopNode = false;
3238  } else {
3239  SU = pickNodeBidirectional(IsTopNode);
3240  }
3241  } while (SU->isScheduled);
3242 
3243  if (SU->isTopReady())
3244  Top.removeReady(SU);
3245  if (SU->isBottomReady())
3246  Bot.removeReady(SU);
3247 
3248  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3249  << *SU->getInstr());
3250  return SU;
3251 }
3252 
3254  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3255  if (!isTop)
3256  ++InsertPos;
3257  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3258 
3259  // Find already scheduled copies with a single physreg dependence and move
3260  // them just above the scheduled instruction.
3261  for (SDep &Dep : Deps) {
3262  if (Dep.getKind() != SDep::Data ||
3263  !Register::isPhysicalRegister(Dep.getReg()))
3264  continue;
3265  SUnit *DepSU = Dep.getSUnit();
3266  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3267  continue;
3268  MachineInstr *Copy = DepSU->getInstr();
3269  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3270  continue;
3271  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3272  DAG->dumpNode(*Dep.getSUnit()));
3273  DAG->moveInstruction(Copy, InsertPos);
3274  }
3275 }
3276 
3277 /// Update the scheduler's state after scheduling a node. This is the same node
3278 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3279 /// update it's state based on the current cycle before MachineSchedStrategy
3280 /// does.
3281 ///
3282 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3283 /// them here. See comments in biasPhysReg.
3284 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3285  if (IsTopNode) {
3286  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3287  Top.bumpNode(SU);
3288  if (SU->hasPhysRegUses)
3289  reschedulePhysReg(SU, true);
3290  } else {
3291  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3292  Bot.bumpNode(SU);
3293  if (SU->hasPhysRegDefs)
3294  reschedulePhysReg(SU, false);
3295  }
3296 }
3297 
3298 /// Create the standard converging machine scheduler. This will be used as the
3299 /// default scheduler if the target does not set a default.
3301  ScheduleDAGMILive *DAG =
3302  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3303  // Register DAG post-processors.
3304  //
3305  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3306  // data and pass it to later mutations. Have a single mutation that gathers
3307  // the interesting nodes in one pass.
3309  return DAG;
3310 }
3311 
3313  return createGenericSchedLive(C);
3314 }
3315 
3316 static MachineSchedRegistry
3317 GenericSchedRegistry("converge", "Standard converging scheduler.",
3319 
3320 //===----------------------------------------------------------------------===//
3321 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3322 //===----------------------------------------------------------------------===//
3323 
3325  DAG = Dag;
3326  SchedModel = DAG->getSchedModel();
3327  TRI = DAG->TRI;
3328 
3329  Rem.init(DAG, SchedModel);
3330  Top.init(DAG, SchedModel, &Rem);
3331  BotRoots.clear();
3332 
3333  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3334  // or are disabled, then these HazardRecs will be disabled.
3335  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3336  if (!Top.HazardRec) {
3337  Top.HazardRec =
3339  Itin, DAG);
3340  }
3341 }
3342 
3344  Rem.CriticalPath = DAG->ExitSU.getDepth();
3345 
3346  // Some roots may not feed into ExitSU. Check all of them in case.
3347  for (const SUnit *SU : BotRoots) {
3348  if (SU->getDepth() > Rem.CriticalPath)
3349  Rem.CriticalPath = SU->getDepth();
3350  }
3351  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3352  if (DumpCriticalPathLength) {
3353  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3354  }
3355 }
3356 
3357 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3358 ///
3359 /// \param Cand provides the policy and current best candidate.
3360 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3362  SchedCandidate &TryCand) {
3363  // Initialize the candidate if needed.
3364  if (!Cand.isValid()) {
3365  TryCand.Reason = NodeOrder;
3366  return;
3367  }
3368 
3369  // Prioritize instructions that read unbuffered resources by stall cycles.
3370  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3371  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3372  return;
3373 
3374  // Keep clustered nodes together.
3375  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3376  Cand.SU == DAG->getNextClusterSucc(),
3377  TryCand, Cand, Cluster))
3378  return;
3379 
3380  // Avoid critical resource consumption and balance the schedule.
3381  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3382  TryCand, Cand, ResourceReduce))
3383  return;
3385  Cand.ResDelta.DemandedResources,
3386  TryCand, Cand, ResourceDemand))
3387  return;
3388 
3389  // Avoid serializing long latency dependence chains.
3390  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3391  return;
3392  }
3393 
3394  // Fall through to original instruction order.
3395  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3396  TryCand.Reason = NodeOrder;
3397 }
3398 
3400  ReadyQueue &Q = Top.Available;
3401  for (SUnit *SU : Q) {
3402  SchedCandidate TryCand(Cand.Policy);
3403  TryCand.SU = SU;
3404  TryCand.AtTop = true;
3405  TryCand.initResourceDelta(DAG, SchedModel);
3406  tryCandidate(Cand, TryCand);
3407  if (TryCand.Reason != NoCand) {
3408  Cand.setBest(TryCand);
3409  LLVM_DEBUG(traceCandidate(Cand));
3410  }
3411  }
3412 }
3413 
3414 /// Pick the next node to schedule.
3416  if (DAG->top() == DAG->bottom()) {
3417  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3418  return nullptr;
3419  }
3420  SUnit *SU;
3421  do {
3422  SU = Top.pickOnlyChoice();
3423  if (SU) {
3424  tracePick(Only1, true);
3425  } else {
3426  CandPolicy NoPolicy;
3427  SchedCandidate TopCand(NoPolicy);
3428  // Set the top-down policy based on the state of the current top zone and
3429  // the instructions outside the zone, including the bottom zone.
3430  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3431  pickNodeFromQueue(TopCand);
3432  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3433  tracePick(TopCand);
3434  SU = TopCand.SU;
3435  }
3436  } while (SU->isScheduled);
3437 
3438  IsTopNode = true;
3439  Top.removeReady(SU);
3440 
3441  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3442  << *SU->getInstr());
3443  return SU;
3444 }
3445 
3446 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3447 /// scheduled/remaining flags in the DAG nodes.
3448 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3449  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3450  Top.bumpNode(SU);
3451 }
3452 
3454  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3455  /*RemoveKillFlags=*/true);
3456 }
3457 
3458 //===----------------------------------------------------------------------===//
3459 // ILP Scheduler. Currently for experimental analysis of heuristics.
3460 //===----------------------------------------------------------------------===//
3461 
3462 namespace {
3463 
3464 /// Order nodes by the ILP metric.
3465 struct ILPOrder {
3466  const SchedDFSResult *DFSResult = nullptr;
3467  const BitVector *ScheduledTrees = nullptr;
3468  bool MaximizeILP;
3469 
3470  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3471 
3472  /// Apply a less-than relation on node priority.
3473  ///
3474  /// (Return true if A comes after B in the Q.)
3475  bool operator()(const SUnit *A, const SUnit *B) const {
3476  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3477  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3478  if (SchedTreeA != SchedTreeB) {
3479  // Unscheduled trees have lower priority.
3480  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3481  return ScheduledTrees->test(SchedTreeB);
3482 
3483  // Trees with shallower connections have have lower priority.
3484  if (DFSResult->getSubtreeLevel(SchedTreeA)
3485  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3486  return DFSResult->getSubtreeLevel(SchedTreeA)
3487  < DFSResult->getSubtreeLevel(SchedTreeB);
3488  }
3489  }
3490  if (MaximizeILP)
3491  return DFSResult->getILP(A) < DFSResult->getILP(B);
3492  else
3493  return DFSResult->getILP(A) > DFSResult->getILP(B);
3494  }
3495 };
3496 
3497 /// Schedule based on the ILP metric.
3498 class ILPScheduler : public MachineSchedStrategy {
3499  ScheduleDAGMILive *DAG = nullptr;
3500  ILPOrder Cmp;
3501 
3502  std::vector<SUnit*> ReadyQ;
3503 
3504 public:
3505  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3506 
3507  void initialize(ScheduleDAGMI *dag) override {
3508  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3509  DAG = static_cast<ScheduleDAGMILive*>(dag);
3510  DAG->computeDFSResult();
3511  Cmp.DFSResult = DAG->getDFSResult();
3512  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3513  ReadyQ.clear();
3514  }
3515 
3516  void registerRoots() override {
3517  // Restore the heap in ReadyQ with the updated DFS results.
3518  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3519  }
3520 
3521  /// Implement MachineSchedStrategy interface.
3522  /// -----------------------------------------
3523 
3524  /// Callback to select the highest priority node from the ready Q.
3525  SUnit *pickNode(bool &IsTopNode) override {
3526  if (ReadyQ.empty()) return nullptr;
3527  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3528  SUnit *SU = ReadyQ.back();
3529  ReadyQ.pop_back();
3530  IsTopNode = false;
3531  LLVM_DEBUG(dbgs() << "Pick node "
3532  << "SU(" << SU->NodeNum << ") "
3533  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3534  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3535  << " @"
3536  << DAG->getDFSResult()->getSubtreeLevel(
3537  DAG->getDFSResult()->getSubtreeID(SU))
3538  << '\n'
3539  << "Scheduling " << *SU->getInstr());
3540  return SU;
3541  }
3542 
3543  /// Scheduler callback to notify that a new subtree is scheduled.
3544  void scheduleTree(unsigned SubtreeID) override {
3545  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3546  }
3547 
3548  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3549  /// DFSResults, and resort the priority Q.
3550  void schedNode(SUnit *SU, bool IsTopNode) override {
3551  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3552  }
3553 
3554  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3555 
3556  void releaseBottomNode(SUnit *SU) override {
3557  ReadyQ.push_back(SU);
3558  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3559  }
3560 };
3561 
3562 } // end anonymous namespace
3563 
3565  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3566 }
3568  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3569 }
3570 
3572  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3574  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3575 
3576 //===----------------------------------------------------------------------===//
3577 // Machine Instruction Shuffler for Correctness Testing
3578 //===----------------------------------------------------------------------===//
3579 
3580 #ifndef NDEBUG
3581 namespace {
3582 
3583 /// Apply a less-than relation on the node order, which corresponds to the
3584 /// instruction order prior to scheduling. IsReverse implements greater-than.
3585 template<bool IsReverse>
3586 struct SUnitOrder {
3587  bool operator()(SUnit *A, SUnit *B) const {
3588  if (IsReverse)
3589  return A->NodeNum > B->NodeNum;
3590  else
3591  return A->NodeNum < B->NodeNum;
3592  }
3593 };
3594 
3595 /// Reorder instructions as much as possible.
3596 class InstructionShuffler : public MachineSchedStrategy {
3597  bool IsAlternating;
3598  bool IsTopDown;
3599 
3600  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3601  // gives nodes with a higher number higher priority causing the latest
3602  // instructions to be scheduled first.
3603  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3604  TopQ;
3605 
3606  // When scheduling bottom-up, use greater-than as the queue priority.
3608  BottomQ;
3609 
3610 public:
3611  InstructionShuffler(bool alternate, bool topdown)
3612  : IsAlternating(alternate), IsTopDown(topdown) {}
3613 
3614  void initialize(ScheduleDAGMI*) override {
3615  TopQ.clear();
3616  BottomQ.clear();
3617  }
3618 
3619  /// Implement MachineSchedStrategy interface.
3620  /// -----------------------------------------
3621 
3622  SUnit *pickNode(bool &IsTopNode) override {
3623  SUnit *SU;
3624  if (IsTopDown) {
3625  do {
3626  if (TopQ.empty()) return nullptr;
3627  SU = TopQ.top();
3628  TopQ.pop();
3629  } while (SU->isScheduled);
3630  IsTopNode = true;
3631  } else {
3632  do {
3633  if (BottomQ.empty()) return nullptr;
3634  SU = BottomQ.top();
3635  BottomQ.pop();
3636  } while (SU->isScheduled);
3637  IsTopNode = false;
3638  }
3639  if (IsAlternating)
3640  IsTopDown = !IsTopDown;
3641  return SU;
3642  }
3643 
3644  void schedNode(SUnit *SU, bool IsTopNode) override {}
3645 
3646  void releaseTopNode(SUnit *SU) override {
3647  TopQ.push(SU);
3648  }
3649  void releaseBottomNode(SUnit *SU) override {
3650  BottomQ.push(SU);
3651  }
3652 };
3653 
3654 } // end anonymous namespace
3655 
3657  bool Alternate = !ForceTopDown && !ForceBottomUp;
3658  bool TopDown = !ForceBottomUp;
3659  assert((TopDown || !ForceTopDown) &&
3660  "-misched-topdown incompatible with -misched-bottomup");
3661  return new ScheduleDAGMILive(
3662  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3663 }
3664 
3666  "shuffle", "Shuffle machine instructions alternating directions",
3668 #endif // !NDEBUG
3669 
3670 //===----------------------------------------------------------------------===//
3671 // GraphWriter support for ScheduleDAGMILive.
3672 //===----------------------------------------------------------------------===//
3673 
3674 #ifndef NDEBUG
3675 namespace llvm {
3676 
3677 template<> struct GraphTraits<
3679 
3680 template<>
3683 
3684  static std::string getGraphName(const ScheduleDAG *G) {
3685  return G->MF.getName();
3686  }
3687 
3688  static bool renderGraphFromBottomUp() {
3689  return true;
3690  }
3691 
3692  static bool isNodeHidden(const SUnit *Node) {
3693  if (ViewMISchedCutoff == 0)
3694  return false;
3695  return (Node->Preds.size() > ViewMISchedCutoff
3696  || Node->Succs.size() > ViewMISchedCutoff);
3697  }
3698 
3699  /// If you want to override the dot attributes printed for a particular
3700  /// edge, override this method.
3701  static std::string getEdgeAttributes(const SUnit *Node,
3702  SUnitIterator EI,
3703  const ScheduleDAG *Graph) {
3704  if (EI.isArtificialDep())
3705  return "color=cyan,style=dashed";
3706  if (EI.isCtrlDep())
3707  return "color=blue,style=dashed";
3708  return "";
3709  }
3710 
3711  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3712  std::string Str;
3713  raw_string_ostream SS(Str);
3714  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3715  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3716  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3717  SS << "SU:" << SU->NodeNum;
3718  if (DFS)
3719  SS << " I:" << DFS->getNumInstrs(SU);
3720  return SS.str();
3721  }
3722 
3723  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3724  return G->getGraphNodeLabel(SU);
3725  }
3726 
3727  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3728  std::string Str("shape=Mrecord");
3729  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3730  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3731  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3732  if (DFS) {
3733  Str += ",style=filled,fillcolor=\"#";
3734  Str += DOT::getColorString(DFS->getSubtreeID(N));
3735  Str += '"';
3736  }
3737  return Str;
3738  }
3739 };
3740 
3741 } // end namespace llvm
3742 #endif // NDEBUG
3743 
3744 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3745 /// rendered using 'dot'.
3746 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3747 #ifndef NDEBUG
3748  ViewGraph(this, Name, false, Title);
3749 #else
3750  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3751  << "systems with Graphviz or gv!\n";
3752 #endif // NDEBUG
3753 }
3754 
3755 /// Out-of-line implementation with no arguments is handy for gdb.
3757  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3758 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:211
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:724
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:215
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:314
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:532
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:844
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:383
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
cl::opt< bool > VerifyScheduling
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:393
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
constexpr double e
Definition: MathExtras.h:57
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
LiveInterval & getInterval(Register Reg)
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles)
Compute the next cycle at which the given processor resource unit can be scheduled.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:389
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::pair< unsigned, unsigned > getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:420
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:214
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:831
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:376
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:509
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register getReg() const
getReg - Returns the register number.
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:344