LLVM  7.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 #else
104 static bool ViewMISchedDAGs = false;
105 #endif // NDEBUG
106 
107 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
108 /// size of the ready lists.
109 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
110  cl::desc("Limit ready list to N instructions"), cl::init(256));
111 
112 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
113  cl::desc("Enable register pressure scheduling."), cl::init(true));
114 
115 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
116  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
117 
118 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
119  cl::desc("Enable memop clustering."),
120  cl::init(true));
121 
122 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
123  cl::desc("Verify machine instrs before and after machine scheduling"));
124 
125 // DAG subtrees must have at least this many nodes.
126 static const unsigned MinSubtreeSize = 8;
127 
128 // Pin the vtables to this file.
129 void MachineSchedStrategy::anchor() {}
130 
131 void ScheduleDAGMutation::anchor() {}
132 
133 //===----------------------------------------------------------------------===//
134 // Machine Instruction Scheduling Pass and Registry
135 //===----------------------------------------------------------------------===//
136 
138  RegClassInfo = new RegisterClassInfo();
139 }
140 
142  delete RegClassInfo;
143 }
144 
145 namespace {
146 
147 /// Base class for a machine scheduler class that can run at any point.
148 class MachineSchedulerBase : public MachineSchedContext,
149  public MachineFunctionPass {
150 public:
151  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
152 
153  void print(raw_ostream &O, const Module* = nullptr) const override;
154 
155 protected:
156  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
157 };
158 
159 /// MachineScheduler runs after coalescing and before register allocation.
160 class MachineScheduler : public MachineSchedulerBase {
161 public:
162  MachineScheduler();
163 
164  void getAnalysisUsage(AnalysisUsage &AU) const override;
165 
166  bool runOnMachineFunction(MachineFunction&) override;
167 
168  static char ID; // Class identification, replacement for typeinfo
169 
170 protected:
171  ScheduleDAGInstrs *createMachineScheduler();
172 };
173 
174 /// PostMachineScheduler runs after shortly before code emission.
175 class PostMachineScheduler : public MachineSchedulerBase {
176 public:
177  PostMachineScheduler();
178 
179  void getAnalysisUsage(AnalysisUsage &AU) const override;
180 
181  bool runOnMachineFunction(MachineFunction&) override;
182 
183  static char ID; // Class identification, replacement for typeinfo
184 
185 protected:
186  ScheduleDAGInstrs *createPostMachineScheduler();
187 };
188 
189 } // end anonymous namespace
190 
191 char MachineScheduler::ID = 0;
192 
194 
195 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
196  "Machine Instruction Scheduler", false, false)
201 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
203 
204 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
206 }
207 
208 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
209  AU.setPreservesCFG();
214  AU.addRequired<SlotIndexes>();
219 }
220 
221 char PostMachineScheduler::ID = 0;
222 
224 
225 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
226  "PostRA Machine Instruction Scheduler", false, false)
227 
228 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
230 }
231 
232 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
233  AU.setPreservesCFG();
238 }
239 
241 
242 /// A dummy default scheduler factory indicates whether the scheduler
243 /// is overridden on the command line.
245  return nullptr;
246 }
247 
248 /// MachineSchedOpt allows command line selection of the scheduler.
251 MachineSchedOpt("misched",
253  cl::desc("Machine instruction scheduler to use"));
254 
256 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
258 
260  "enable-misched",
261  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
262  cl::Hidden);
263 
265  "enable-post-misched",
266  cl::desc("Enable the post-ra machine instruction scheduling pass."),
267  cl::init(true), cl::Hidden);
268 
269 /// Decrement this iterator until reaching the top or a non-debug instr.
273  assert(I != Beg && "reached the top of the region, cannot decrement");
274  while (--I != Beg) {
275  if (!I->isDebugInstr())
276  break;
277  }
278  return I;
279 }
280 
281 /// Non-const version.
287 }
288 
289 /// If this iterator is a debug value, increment until reaching the End or a
290 /// non-debug instruction.
294  for(; I != End; ++I) {
295  if (!I->isDebugInstr())
296  break;
297  }
298  return I;
299 }
300 
301 /// Non-const version.
307 }
308 
309 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
310 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
311  // Select the scheduler, or set the default.
312  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
313  if (Ctor != useDefaultMachineSched)
314  return Ctor(this);
315 
316  // Get the default scheduler set by the target for this function.
317  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
318  if (Scheduler)
319  return Scheduler;
320 
321  // Default to GenericScheduler.
322  return createGenericSchedLive(this);
323 }
324 
325 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
326 /// the caller. We don't have a command line option to override the postRA
327 /// scheduler. The Target must configure it.
328 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
329  // Get the postRA scheduler set by the target for this function.
330  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
331  if (Scheduler)
332  return Scheduler;
333 
334  // Default to GenericScheduler.
335  return createGenericSchedPostRA(this);
336 }
337 
338 /// Top-level MachineScheduler pass driver.
339 ///
340 /// Visit blocks in function order. Divide each block into scheduling regions
341 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
342 /// consistent with the DAG builder, which traverses the interior of the
343 /// scheduling regions bottom-up.
344 ///
345 /// This design avoids exposing scheduling boundaries to the DAG builder,
346 /// simplifying the DAG builder's support for "special" target instructions.
347 /// At the same time the design allows target schedulers to operate across
348 /// scheduling boundaries, for example to bundle the boundary instructions
349 /// without reordering them. This creates complexity, because the target
350 /// scheduler must update the RegionBegin and RegionEnd positions cached by
351 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
352 /// design would be to split blocks at scheduling boundaries, but LLVM has a
353 /// general bias against block splitting purely for implementation simplicity.
354 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
355  if (skipFunction(mf.getFunction()))
356  return false;
357 
358  if (EnableMachineSched.getNumOccurrences()) {
359  if (!EnableMachineSched)
360  return false;
361  } else if (!mf.getSubtarget().enableMachineScheduler())
362  return false;
363 
364  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
365 
366  // Initialize the context of the pass.
367  MF = &mf;
368  MLI = &getAnalysis<MachineLoopInfo>();
369  MDT = &getAnalysis<MachineDominatorTree>();
370  PassConfig = &getAnalysis<TargetPassConfig>();
371  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
372 
373  LIS = &getAnalysis<LiveIntervals>();
374 
375  if (VerifyScheduling) {
376  LLVM_DEBUG(LIS->dump());
377  MF->verify(this, "Before machine scheduling.");
378  }
379  RegClassInfo->runOnMachineFunction(*MF);
380 
381  // Instantiate the selected scheduler for this target, function, and
382  // optimization level.
383  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
384  scheduleRegions(*Scheduler, false);
385 
386  LLVM_DEBUG(LIS->dump());
387  if (VerifyScheduling)
388  MF->verify(this, "After machine scheduling.");
389  return true;
390 }
391 
392 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
393  if (skipFunction(mf.getFunction()))
394  return false;
395 
396  if (EnablePostRAMachineSched.getNumOccurrences()) {
397  if (!EnablePostRAMachineSched)
398  return false;
399  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
400  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
401  return false;
402  }
403  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
404 
405  // Initialize the context of the pass.
406  MF = &mf;
407  MLI = &getAnalysis<MachineLoopInfo>();
408  PassConfig = &getAnalysis<TargetPassConfig>();
409 
410  if (VerifyScheduling)
411  MF->verify(this, "Before post machine scheduling.");
412 
413  // Instantiate the selected scheduler for this target, function, and
414  // optimization level.
415  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
416  scheduleRegions(*Scheduler, true);
417 
418  if (VerifyScheduling)
419  MF->verify(this, "After post machine scheduling.");
420  return true;
421 }
422 
423 /// Return true of the given instruction should not be included in a scheduling
424 /// region.
425 ///
426 /// MachineScheduler does not currently support scheduling across calls. To
427 /// handle calls, the DAG builder needs to be modified to create register
428 /// anti/output dependencies on the registers clobbered by the call's regmask
429 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
430 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
431 /// the boundary, but there would be no benefit to postRA scheduling across
432 /// calls this late anyway.
434  MachineBasicBlock *MBB,
435  MachineFunction *MF,
436  const TargetInstrInfo *TII) {
437  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
438 }
439 
440 /// A region of an MBB for scheduling.
441 namespace {
442 struct SchedRegion {
443  /// RegionBegin is the first instruction in the scheduling region, and
444  /// RegionEnd is either MBB->end() or the scheduling boundary after the
445  /// last instruction in the scheduling region. These iterators cannot refer
446  /// to instructions outside of the identified scheduling region because
447  /// those may be reordered before scheduling this region.
448  MachineBasicBlock::iterator RegionBegin;
449  MachineBasicBlock::iterator RegionEnd;
450  unsigned NumRegionInstrs;
451 
453  unsigned N) :
454  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
455 };
456 } // end anonymous namespace
457 
459 
460 static void
462  MBBRegionsVector &Regions,
463  bool RegionsTopDown) {
464  MachineFunction *MF = MBB->getParent();
465  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
466 
467  MachineBasicBlock::iterator I = nullptr;
468  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
469  RegionEnd != MBB->begin(); RegionEnd = I) {
470 
471  // Avoid decrementing RegionEnd for blocks with no terminator.
472  if (RegionEnd != MBB->end() ||
473  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
474  --RegionEnd;
475  }
476 
477  // The next region starts above the previous region. Look backward in the
478  // instruction stream until we find the nearest boundary.
479  unsigned NumRegionInstrs = 0;
480  I = RegionEnd;
481  for (;I != MBB->begin(); --I) {
482  MachineInstr &MI = *std::prev(I);
483  if (isSchedBoundary(&MI, &*MBB, MF, TII))
484  break;
485  if (!MI.isDebugInstr())
486  // MBB::size() uses instr_iterator to count. Here we need a bundle to
487  // count as a single instruction.
488  ++NumRegionInstrs;
489  }
490 
491  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
492  }
493 
494  if (RegionsTopDown)
495  std::reverse(Regions.begin(), Regions.end());
496 }
497 
498 /// Main driver for both MachineScheduler and PostMachineScheduler.
499 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
500  bool FixKillFlags) {
501  // Visit all machine basic blocks.
502  //
503  // TODO: Visit blocks in global postorder or postorder within the bottom-up
504  // loop tree. Then we can optionally compute global RegPressure.
505  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
506  MBB != MBBEnd; ++MBB) {
507 
508  Scheduler.startBlock(&*MBB);
509 
510 #ifndef NDEBUG
511  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
512  continue;
513  if (SchedOnlyBlock.getNumOccurrences()
514  && (int)SchedOnlyBlock != MBB->getNumber())
515  continue;
516 #endif
517 
518  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
519  // points to the scheduling boundary at the bottom of the region. The DAG
520  // does not include RegionEnd, but the region does (i.e. the next
521  // RegionEnd is above the previous RegionBegin). If the current block has
522  // no terminator then RegionEnd == MBB->end() for the bottom region.
523  //
524  // All the regions of MBB are first found and stored in MBBRegions, which
525  // will be processed (MBB) top-down if initialized with true.
526  //
527  // The Scheduler may insert instructions during either schedule() or
528  // exitRegion(), even for empty regions. So the local iterators 'I' and
529  // 'RegionEnd' are invalid across these calls. Instructions must not be
530  // added to other regions than the current one without updating MBBRegions.
531 
532  MBBRegionsVector MBBRegions;
533  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
534  for (MBBRegionsVector::iterator R = MBBRegions.begin();
535  R != MBBRegions.end(); ++R) {
536  MachineBasicBlock::iterator I = R->RegionBegin;
537  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
538  unsigned NumRegionInstrs = R->NumRegionInstrs;
539 
540  // Notify the scheduler of the region, even if we may skip scheduling
541  // it. Perhaps it still needs to be bundled.
542  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
543 
544  // Skip empty scheduling regions (0 or 1 schedulable instructions).
545  if (I == RegionEnd || I == std::prev(RegionEnd)) {
546  // Close the current region. Bundle the terminator if needed.
547  // This invalidates 'RegionEnd' and 'I'.
548  Scheduler.exitRegion();
549  continue;
550  }
551  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
552  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
553  << " " << MBB->getName() << "\n From: " << *I
554  << " To: ";
555  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
556  else dbgs() << "End";
557  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
559  errs() << MF->getName();
560  errs() << ":%bb. " << MBB->getNumber();
561  errs() << " " << MBB->getName() << " \n";
562  }
563 
564  // Schedule a region: possibly reorder instructions.
565  // This invalidates the original region iterators.
566  Scheduler.schedule();
567 
568  // Close the current region.
569  Scheduler.exitRegion();
570  }
571  Scheduler.finishBlock();
572  // FIXME: Ideally, no further passes should rely on kill flags. However,
573  // thumb2 size reduction is currently an exception, so the PostMIScheduler
574  // needs to do this.
575  if (FixKillFlags)
576  Scheduler.fixupKills(*MBB);
577  }
578  Scheduler.finalizeSchedule();
579 }
580 
581 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
582  // unimplemented
583 }
584 
585 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
587  dbgs() << "Queue " << Name << ": ";
588  for (const SUnit *SU : Queue)
589  dbgs() << SU->NodeNum << " ";
590  dbgs() << "\n";
591 }
592 #endif
593 
594 //===----------------------------------------------------------------------===//
595 // ScheduleDAGMI - Basic machine instruction scheduling. This is
596 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
597 // virtual registers.
598 // ===----------------------------------------------------------------------===/
599 
600 // Provide a vtable anchor.
602 
603 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
604  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
605 }
606 
607 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
608  if (SuccSU != &ExitSU) {
609  // Do not use WillCreateCycle, it assumes SD scheduling.
610  // If Pred is reachable from Succ, then the edge creates a cycle.
611  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
612  return false;
613  Topo.AddPred(SuccSU, PredDep.getSUnit());
614  }
615  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
616  // Return true regardless of whether a new edge needed to be inserted.
617  return true;
618 }
619 
620 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
621 /// NumPredsLeft reaches zero, release the successor node.
622 ///
623 /// FIXME: Adjust SuccSU height based on MinLatency.
624 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
625  SUnit *SuccSU = SuccEdge->getSUnit();
626 
627  if (SuccEdge->isWeak()) {
628  --SuccSU->WeakPredsLeft;
629  if (SuccEdge->isCluster())
630  NextClusterSucc = SuccSU;
631  return;
632  }
633 #ifndef NDEBUG
634  if (SuccSU->NumPredsLeft == 0) {
635  dbgs() << "*** Scheduling failed! ***\n";
636  SuccSU->dump(this);
637  dbgs() << " has been released too many times!\n";
638  llvm_unreachable(nullptr);
639  }
640 #endif
641  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
642  // CurrCycle may have advanced since then.
643  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
644  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
645 
646  --SuccSU->NumPredsLeft;
647  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
648  SchedImpl->releaseTopNode(SuccSU);
649 }
650 
651 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
653  for (SDep &Succ : SU->Succs)
654  releaseSucc(SU, &Succ);
655 }
656 
657 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
658 /// NumSuccsLeft reaches zero, release the predecessor node.
659 ///
660 /// FIXME: Adjust PredSU height based on MinLatency.
661 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
662  SUnit *PredSU = PredEdge->getSUnit();
663 
664  if (PredEdge->isWeak()) {
665  --PredSU->WeakSuccsLeft;
666  if (PredEdge->isCluster())
667  NextClusterPred = PredSU;
668  return;
669  }
670 #ifndef NDEBUG
671  if (PredSU->NumSuccsLeft == 0) {
672  dbgs() << "*** Scheduling failed! ***\n";
673  PredSU->dump(this);
674  dbgs() << " has been released too many times!\n";
675  llvm_unreachable(nullptr);
676  }
677 #endif
678  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
679  // CurrCycle may have advanced since then.
680  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
681  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
682 
683  --PredSU->NumSuccsLeft;
684  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
685  SchedImpl->releaseBottomNode(PredSU);
686 }
687 
688 /// releasePredecessors - Call releasePred on each of SU's predecessors.
690  for (SDep &Pred : SU->Preds)
691  releasePred(SU, &Pred);
692 }
693 
696  SchedImpl->enterMBB(bb);
697 }
698 
700  SchedImpl->leaveMBB();
702 }
703 
704 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
705 /// crossing a scheduling boundary. [begin, end) includes all instructions in
706 /// the region, including the boundary itself and single-instruction regions
707 /// that don't get scheduled.
711  unsigned regioninstrs)
712 {
713  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
714 
715  SchedImpl->initPolicy(begin, end, regioninstrs);
716 }
717 
718 /// This is normally called from the main scheduler loop but may also be invoked
719 /// by the scheduling strategy to perform additional code motion.
722  // Advance RegionBegin if the first instruction moves down.
723  if (&*RegionBegin == MI)
724  ++RegionBegin;
725 
726  // Update the instruction stream.
727  BB->splice(InsertPos, BB, MI);
728 
729  // Update LiveIntervals
730  if (LIS)
731  LIS->handleMove(*MI, /*UpdateFlags=*/true);
732 
733  // Recede RegionBegin if an instruction moves above the first.
734  if (RegionBegin == InsertPos)
735  RegionBegin = MI;
736 }
737 
739 #ifndef NDEBUG
740  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
741  CurrentTop = CurrentBottom;
742  return false;
743  }
744  ++NumInstrsScheduled;
745 #endif
746  return true;
747 }
748 
749 /// Per-region scheduling driver, called back from
750 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
751 /// does not consider liveness or register pressure. It is useful for PostRA
752 /// scheduling and potentially other custom schedulers.
754  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
755  LLVM_DEBUG(SchedImpl->dumpPolicy());
756 
757  // Build the DAG.
758  buildSchedGraph(AA);
759 
760  Topo.InitDAGTopologicalSorting();
761 
762  postprocessDAG();
763 
764  SmallVector<SUnit*, 8> TopRoots, BotRoots;
765  findRootsAndBiasEdges(TopRoots, BotRoots);
766 
767  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
768  for (const SUnit &SU
769  : SUnits) SU.dumpAll(this);
770  if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
771  if (ViewMISchedDAGs) viewGraph();
772 
773  // Initialize the strategy before modifying the DAG.
774  // This may initialize a DFSResult to be used for queue priority.
775  SchedImpl->initialize(this);
776 
777  // Initialize ready queues now that the DAG and priority data are finalized.
778  initQueues(TopRoots, BotRoots);
779 
780  bool IsTopNode = false;
781  while (true) {
782  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
783  SUnit *SU = SchedImpl->pickNode(IsTopNode);
784  if (!SU) break;
785 
786  assert(!SU->isScheduled && "Node already scheduled");
787  if (!checkSchedLimit())
788  break;
789 
790  MachineInstr *MI = SU->getInstr();
791  if (IsTopNode) {
792  assert(SU->isTopReady() && "node still has unscheduled dependencies");
793  if (&*CurrentTop == MI)
794  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
795  else
796  moveInstruction(MI, CurrentTop);
797  } else {
798  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
800  priorNonDebug(CurrentBottom, CurrentTop);
801  if (&*priorII == MI)
802  CurrentBottom = priorII;
803  else {
804  if (&*CurrentTop == MI)
805  CurrentTop = nextIfDebug(++CurrentTop, priorII);
806  moveInstruction(MI, CurrentBottom);
807  CurrentBottom = MI;
808  }
809  }
810  // Notify the scheduling strategy before updating the DAG.
811  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
812  // runs, it can then use the accurate ReadyCycle time to determine whether
813  // newly released nodes can move to the readyQ.
814  SchedImpl->schedNode(SU, IsTopNode);
815 
816  updateQueues(SU, IsTopNode);
817  }
818  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
819 
820  placeDebugValues();
821 
822  LLVM_DEBUG({
823  dbgs() << "*** Final schedule for "
824  << printMBBReference(*begin()->getParent()) << " ***\n";
825  dumpSchedule();
826  dbgs() << '\n';
827  });
828 }
829 
830 /// Apply each ScheduleDAGMutation step in order.
832  for (auto &m : Mutations)
833  m->apply(this);
834 }
835 
836 void ScheduleDAGMI::
838  SmallVectorImpl<SUnit*> &BotRoots) {
839  for (SUnit &SU : SUnits) {
840  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
841 
842  // Order predecessors so DFSResult follows the critical path.
843  SU.biasCriticalPath();
844 
845  // A SUnit is ready to top schedule if it has no predecessors.
846  if (!SU.NumPredsLeft)
847  TopRoots.push_back(&SU);
848  // A SUnit is ready to bottom schedule if it has no successors.
849  if (!SU.NumSuccsLeft)
850  BotRoots.push_back(&SU);
851  }
852  ExitSU.biasCriticalPath();
853 }
854 
855 /// Identify DAG roots and setup scheduler queues.
857  ArrayRef<SUnit*> BotRoots) {
858  NextClusterSucc = nullptr;
859  NextClusterPred = nullptr;
860 
861  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
862  //
863  // Nodes with unreleased weak edges can still be roots.
864  // Release top roots in forward order.
865  for (SUnit *SU : TopRoots)
866  SchedImpl->releaseTopNode(SU);
867 
868  // Release bottom roots in reverse order so the higher priority nodes appear
869  // first. This is more natural and slightly more efficient.
871  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
872  SchedImpl->releaseBottomNode(*I);
873  }
874 
875  releaseSuccessors(&EntrySU);
876  releasePredecessors(&ExitSU);
877 
878  SchedImpl->registerRoots();
879 
880  // Advance past initial DebugValues.
881  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
882  CurrentBottom = RegionEnd;
883 }
884 
885 /// Update scheduler queues after scheduling an instruction.
886 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
887  // Release dependent instructions for scheduling.
888  if (IsTopNode)
889  releaseSuccessors(SU);
890  else
891  releasePredecessors(SU);
892 
893  SU->isScheduled = true;
894 }
895 
896 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
898  // If first instruction was a DBG_VALUE then put it back.
899  if (FirstDbgValue) {
900  BB->splice(RegionBegin, BB, FirstDbgValue);
901  RegionBegin = FirstDbgValue;
902  }
903 
904  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
905  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
906  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
907  MachineInstr *DbgValue = P.first;
908  MachineBasicBlock::iterator OrigPrevMI = P.second;
909  if (&*RegionBegin == DbgValue)
910  ++RegionBegin;
911  BB->splice(++OrigPrevMI, BB, DbgValue);
912  if (OrigPrevMI == std::prev(RegionEnd))
913  RegionEnd = DbgValue;
914  }
915  DbgValues.clear();
916  FirstDbgValue = nullptr;
917 }
918 
919 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
921  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
922  if (SUnit *SU = getSUnit(&(*MI)))
923  SU->dump(this);
924  else
925  dbgs() << "Missing SUnit\n";
926  }
927 }
928 #endif
929 
930 //===----------------------------------------------------------------------===//
931 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
932 // preservation.
933 //===----------------------------------------------------------------------===//
934 
936  delete DFSResult;
937 }
938 
940  const MachineInstr &MI = *SU.getInstr();
941  for (const MachineOperand &MO : MI.operands()) {
942  if (!MO.isReg())
943  continue;
944  if (!MO.readsReg())
945  continue;
946  if (TrackLaneMasks && !MO.isUse())
947  continue;
948 
949  unsigned Reg = MO.getReg();
951  continue;
952 
953  // Ignore re-defs.
954  if (TrackLaneMasks) {
955  bool FoundDef = false;
956  for (const MachineOperand &MO2 : MI.operands()) {
957  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
958  FoundDef = true;
959  break;
960  }
961  }
962  if (FoundDef)
963  continue;
964  }
965 
966  // Record this local VReg use.
967  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
968  for (; UI != VRegUses.end(); ++UI) {
969  if (UI->SU == &SU)
970  break;
971  }
972  if (UI == VRegUses.end())
973  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
974  }
975 }
976 
977 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
978 /// crossing a scheduling boundary. [begin, end) includes all instructions in
979 /// the region, including the boundary itself and single-instruction regions
980 /// that don't get scheduled.
984  unsigned regioninstrs)
985 {
986  // ScheduleDAGMI initializes SchedImpl's per-region policy.
987  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
988 
989  // For convenience remember the end of the liveness region.
990  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
991 
992  SUPressureDiffs.clear();
993 
994  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
995  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
996 
997  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
998  "ShouldTrackLaneMasks requires ShouldTrackPressure");
999 }
1000 
1001 // Setup the register pressure trackers for the top scheduled top and bottom
1002 // scheduled regions.
1004  VRegUses.clear();
1005  VRegUses.setUniverse(MRI.getNumVirtRegs());
1006  for (SUnit &SU : SUnits)
1007  collectVRegUses(SU);
1008 
1009  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1010  ShouldTrackLaneMasks, false);
1011  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1012  ShouldTrackLaneMasks, false);
1013 
1014  // Close the RPTracker to finalize live ins.
1015  RPTracker.closeRegion();
1016 
1017  LLVM_DEBUG(RPTracker.dump());
1018 
1019  // Initialize the live ins and live outs.
1020  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1021  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1022 
1023  // Close one end of the tracker so we can call
1024  // getMaxUpward/DownwardPressureDelta before advancing across any
1025  // instructions. This converts currently live regs into live ins/outs.
1026  TopRPTracker.closeTop();
1027  BotRPTracker.closeBottom();
1028 
1029  BotRPTracker.initLiveThru(RPTracker);
1030  if (!BotRPTracker.getLiveThru().empty()) {
1031  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1032  LLVM_DEBUG(dbgs() << "Live Thru: ";
1033  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1034  };
1035 
1036  // For each live out vreg reduce the pressure change associated with other
1037  // uses of the same vreg below the live-out reaching def.
1038  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1039 
1040  // Account for liveness generated by the region boundary.
1041  if (LiveRegionEnd != RegionEnd) {
1043  BotRPTracker.recede(&LiveUses);
1044  updatePressureDiffs(LiveUses);
1045  }
1046 
1047  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1048  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1049  dbgs() << "Bottom Pressure:\n";
1050  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1051 
1052  assert((BotRPTracker.getPos() == RegionEnd ||
1053  (RegionEnd->isDebugInstr() &&
1054  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1055  "Can't find the region bottom");
1056 
1057  // Cache the list of excess pressure sets in this region. This will also track
1058  // the max pressure in the scheduled code for these sets.
1059  RegionCriticalPSets.clear();
1060  const std::vector<unsigned> &RegionPressure =
1061  RPTracker.getPressure().MaxSetPressure;
1062  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1063  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1064  if (RegionPressure[i] > Limit) {
1065  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1066  << " Actual " << RegionPressure[i] << "\n");
1067  RegionCriticalPSets.push_back(PressureChange(i));
1068  }
1069  }
1070  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1071  for (const PressureChange &RCPS
1072  : RegionCriticalPSets) dbgs()
1073  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1074  dbgs() << "\n");
1075 }
1076 
1079  const std::vector<unsigned> &NewMaxPressure) {
1080  const PressureDiff &PDiff = getPressureDiff(SU);
1081  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1082  for (const PressureChange &PC : PDiff) {
1083  if (!PC.isValid())
1084  break;
1085  unsigned ID = PC.getPSet();
1086  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1087  ++CritIdx;
1088  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1089  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1090  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1091  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1092  }
1093  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1094  if (NewMaxPressure[ID] >= Limit - 2) {
1095  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1096  << NewMaxPressure[ID]
1097  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1098  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1099  << " livethru)\n");
1100  }
1101  }
1102 }
1103 
1104 /// Update the PressureDiff array for liveness after scheduling this
1105 /// instruction.
1107  ArrayRef<RegisterMaskPair> LiveUses) {
1108  for (const RegisterMaskPair &P : LiveUses) {
1109  unsigned Reg = P.RegUnit;
1110  /// FIXME: Currently assuming single-use physregs.
1111  if (!TRI->isVirtualRegister(Reg))
1112  continue;
1113 
1114  if (ShouldTrackLaneMasks) {
1115  // If the register has just become live then other uses won't change
1116  // this fact anymore => decrement pressure.
1117  // If the register has just become dead then other uses make it come
1118  // back to life => increment pressure.
1119  bool Decrement = P.LaneMask.any();
1120 
1121  for (const VReg2SUnit &V2SU
1122  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1123  SUnit &SU = *V2SU.SU;
1124  if (SU.isScheduled || &SU == &ExitSU)
1125  continue;
1126 
1127  PressureDiff &PDiff = getPressureDiff(&SU);
1128  PDiff.addPressureChange(Reg, Decrement, &MRI);
1129  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1130  << printReg(Reg, TRI) << ':'
1131  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1132  dbgs() << " to "; PDiff.dump(*TRI););
1133  }
1134  } else {
1135  assert(P.LaneMask.any());
1136  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1137  // This may be called before CurrentBottom has been initialized. However,
1138  // BotRPTracker must have a valid position. We want the value live into the
1139  // instruction or live out of the block, so ask for the previous
1140  // instruction's live-out.
1141  const LiveInterval &LI = LIS->getInterval(Reg);
1142  VNInfo *VNI;
1144  nextIfDebug(BotRPTracker.getPos(), BB->end());
1145  if (I == BB->end())
1146  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1147  else {
1148  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1149  VNI = LRQ.valueIn();
1150  }
1151  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1152  assert(VNI && "No live value at use.");
1153  for (const VReg2SUnit &V2SU
1154  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1155  SUnit *SU = V2SU.SU;
1156  // If this use comes before the reaching def, it cannot be a last use,
1157  // so decrease its pressure change.
1158  if (!SU->isScheduled && SU != &ExitSU) {
1159  LiveQueryResult LRQ =
1160  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1161  if (LRQ.valueIn() == VNI) {
1162  PressureDiff &PDiff = getPressureDiff(SU);
1163  PDiff.addPressureChange(Reg, true, &MRI);
1164  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1165  << *SU->getInstr();
1166  dbgs() << " to "; PDiff.dump(*TRI););
1167  }
1168  }
1169  }
1170  }
1171  }
1172 }
1173 
1174 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1175 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1176 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1177 ///
1178 /// This is a skeletal driver, with all the functionality pushed into helpers,
1179 /// so that it can be easily extended by experimental schedulers. Generally,
1180 /// implementing MachineSchedStrategy should be sufficient to implement a new
1181 /// scheduling algorithm. However, if a scheduler further subclasses
1182 /// ScheduleDAGMILive then it will want to override this virtual method in order
1183 /// to update any specialized state.
1185  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1186  LLVM_DEBUG(SchedImpl->dumpPolicy());
1187  buildDAGWithRegPressure();
1188 
1189  Topo.InitDAGTopologicalSorting();
1190 
1191  postprocessDAG();
1192 
1193  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1194  findRootsAndBiasEdges(TopRoots, BotRoots);
1195 
1196  // Initialize the strategy before modifying the DAG.
1197  // This may initialize a DFSResult to be used for queue priority.
1198  SchedImpl->initialize(this);
1199 
1200  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
1201  for (const SUnit &SU
1202  : SUnits) {
1203  SU.dumpAll(this);
1204  if (ShouldTrackPressure) {
1205  dbgs() << " Pressure Diff : ";
1206  getPressureDiff(&SU).dump(*TRI);
1207  }
1208  dbgs() << " Single Issue : ";
1209  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1210  SchedModel.mustEndGroup(SU.getInstr()))
1211  dbgs() << "true;";
1212  else
1213  dbgs() << "false;";
1214  dbgs() << '\n';
1215  } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
1216  if (ViewMISchedDAGs) viewGraph();
1217 
1218  // Initialize ready queues now that the DAG and priority data are finalized.
1219  initQueues(TopRoots, BotRoots);
1220 
1221  bool IsTopNode = false;
1222  while (true) {
1223  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1224  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1225  if (!SU) break;
1226 
1227  assert(!SU->isScheduled && "Node already scheduled");
1228  if (!checkSchedLimit())
1229  break;
1230 
1231  scheduleMI(SU, IsTopNode);
1232 
1233  if (DFSResult) {
1234  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1235  if (!ScheduledTrees.test(SubtreeID)) {
1236  ScheduledTrees.set(SubtreeID);
1237  DFSResult->scheduleTree(SubtreeID);
1238  SchedImpl->scheduleTree(SubtreeID);
1239  }
1240  }
1241 
1242  // Notify the scheduling strategy after updating the DAG.
1243  SchedImpl->schedNode(SU, IsTopNode);
1244 
1245  updateQueues(SU, IsTopNode);
1246  }
1247  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1248 
1249  placeDebugValues();
1250 
1251  LLVM_DEBUG({
1252  dbgs() << "*** Final schedule for "
1253  << printMBBReference(*begin()->getParent()) << " ***\n";
1254  dumpSchedule();
1255  dbgs() << '\n';
1256  });
1257 }
1258 
1259 /// Build the DAG and setup three register pressure trackers.
1261  if (!ShouldTrackPressure) {
1262  RPTracker.reset();
1263  RegionCriticalPSets.clear();
1264  buildSchedGraph(AA);
1265  return;
1266  }
1267 
1268  // Initialize the register pressure tracker used by buildSchedGraph.
1269  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1270  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1271 
1272  // Account for liveness generate by the region boundary.
1273  if (LiveRegionEnd != RegionEnd)
1274  RPTracker.recede();
1275 
1276  // Build the DAG, and compute current register pressure.
1277  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1278 
1279  // Initialize top/bottom trackers after computing region pressure.
1280  initRegPressure();
1281 }
1282 
1284  if (!DFSResult)
1285  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1286  DFSResult->clear();
1287  ScheduledTrees.clear();
1288  DFSResult->resize(SUnits.size());
1289  DFSResult->compute(SUnits);
1290  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1291 }
1292 
1293 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1294 /// only provides the critical path for single block loops. To handle loops that
1295 /// span blocks, we could use the vreg path latencies provided by
1296 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1297 /// available for use in the scheduler.
1298 ///
1299 /// The cyclic path estimation identifies a def-use pair that crosses the back
1300 /// edge and considers the depth and height of the nodes. For example, consider
1301 /// the following instruction sequence where each instruction has unit latency
1302 /// and defines an epomymous virtual register:
1303 ///
1304 /// a->b(a,c)->c(b)->d(c)->exit
1305 ///
1306 /// The cyclic critical path is a two cycles: b->c->b
1307 /// The acyclic critical path is four cycles: a->b->c->d->exit
1308 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1309 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1310 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1311 /// LiveInDepth = depth(b) = len(a->b) = 1
1312 ///
1313 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1314 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1315 /// CyclicCriticalPath = min(2, 2) = 2
1316 ///
1317 /// This could be relevant to PostRA scheduling, but is currently implemented
1318 /// assuming LiveIntervals.
1320  // This only applies to single block loop.
1321  if (!BB->isSuccessor(BB))
1322  return 0;
1323 
1324  unsigned MaxCyclicLatency = 0;
1325  // Visit each live out vreg def to find def/use pairs that cross iterations.
1326  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1327  unsigned Reg = P.RegUnit;
1328  if (!TRI->isVirtualRegister(Reg))
1329  continue;
1330  const LiveInterval &LI = LIS->getInterval(Reg);
1331  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1332  if (!DefVNI)
1333  continue;
1334 
1335  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1336  const SUnit *DefSU = getSUnit(DefMI);
1337  if (!DefSU)
1338  continue;
1339 
1340  unsigned LiveOutHeight = DefSU->getHeight();
1341  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1342  // Visit all local users of the vreg def.
1343  for (const VReg2SUnit &V2SU
1344  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1345  SUnit *SU = V2SU.SU;
1346  if (SU == &ExitSU)
1347  continue;
1348 
1349  // Only consider uses of the phi.
1350  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1351  if (!LRQ.valueIn()->isPHIDef())
1352  continue;
1353 
1354  // Assume that a path spanning two iterations is a cycle, which could
1355  // overestimate in strange cases. This allows cyclic latency to be
1356  // estimated as the minimum slack of the vreg's depth or height.
1357  unsigned CyclicLatency = 0;
1358  if (LiveOutDepth > SU->getDepth())
1359  CyclicLatency = LiveOutDepth - SU->getDepth();
1360 
1361  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1362  if (LiveInHeight > LiveOutHeight) {
1363  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1364  CyclicLatency = LiveInHeight - LiveOutHeight;
1365  } else
1366  CyclicLatency = 0;
1367 
1368  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1369  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1370  if (CyclicLatency > MaxCyclicLatency)
1371  MaxCyclicLatency = CyclicLatency;
1372  }
1373  }
1374  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1375  return MaxCyclicLatency;
1376 }
1377 
1378 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1379 /// the Top RP tracker in case the region beginning has changed.
1381  ArrayRef<SUnit*> BotRoots) {
1382  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1383  if (ShouldTrackPressure) {
1384  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1385  TopRPTracker.setPos(CurrentTop);
1386  }
1387 }
1388 
1389 /// Move an instruction and update register pressure.
1390 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1391  // Move the instruction to its new location in the instruction stream.
1392  MachineInstr *MI = SU->getInstr();
1393 
1394  if (IsTopNode) {
1395  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1396  if (&*CurrentTop == MI)
1397  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1398  else {
1399  moveInstruction(MI, CurrentTop);
1400  TopRPTracker.setPos(MI);
1401  }
1402 
1403  if (ShouldTrackPressure) {
1404  // Update top scheduled pressure.
1405  RegisterOperands RegOpers;
1406  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1407  if (ShouldTrackLaneMasks) {
1408  // Adjust liveness and add missing dead+read-undef flags.
1409  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1410  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1411  } else {
1412  // Adjust for missing dead-def flags.
1413  RegOpers.detectDeadDefs(*MI, *LIS);
1414  }
1415 
1416  TopRPTracker.advance(RegOpers);
1417  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1418  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1419  TopRPTracker.getRegSetPressureAtPos(), TRI););
1420 
1421  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1422  }
1423  } else {
1424  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1425  MachineBasicBlock::iterator priorII =
1426  priorNonDebug(CurrentBottom, CurrentTop);
1427  if (&*priorII == MI)
1428  CurrentBottom = priorII;
1429  else {
1430  if (&*CurrentTop == MI) {
1431  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1432  TopRPTracker.setPos(CurrentTop);
1433  }
1434  moveInstruction(MI, CurrentBottom);
1435  CurrentBottom = MI;
1436  BotRPTracker.setPos(CurrentBottom);
1437  }
1438  if (ShouldTrackPressure) {
1439  RegisterOperands RegOpers;
1440  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1441  if (ShouldTrackLaneMasks) {
1442  // Adjust liveness and add missing dead+read-undef flags.
1443  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1444  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1445  } else {
1446  // Adjust for missing dead-def flags.
1447  RegOpers.detectDeadDefs(*MI, *LIS);
1448  }
1449 
1450  if (BotRPTracker.getPos() != CurrentBottom)
1451  BotRPTracker.recedeSkipDebugValues();
1453  BotRPTracker.recede(RegOpers, &LiveUses);
1454  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1455  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1456  BotRPTracker.getRegSetPressureAtPos(), TRI););
1457 
1458  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1459  updatePressureDiffs(LiveUses);
1460  }
1461  }
1462 }
1463 
1464 //===----------------------------------------------------------------------===//
1465 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1466 //===----------------------------------------------------------------------===//
1467 
1468 namespace {
1469 
1470 /// Post-process the DAG to create cluster edges between neighboring
1471 /// loads or between neighboring stores.
1472 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1473  struct MemOpInfo {
1474  SUnit *SU;
1475  unsigned BaseReg;
1476  int64_t Offset;
1477 
1478  MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1479  : SU(su), BaseReg(reg), Offset(ofs) {}
1480 
1481  bool operator<(const MemOpInfo&RHS) const {
1482  return std::tie(BaseReg, Offset, SU->NodeNum) <
1483  std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1484  }
1485  };
1486 
1487  const TargetInstrInfo *TII;
1488  const TargetRegisterInfo *TRI;
1489  bool IsLoad;
1490 
1491 public:
1492  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1493  const TargetRegisterInfo *tri, bool IsLoad)
1494  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1495 
1496  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1497 
1498 protected:
1499  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1500 };
1501 
1502 class StoreClusterMutation : public BaseMemOpClusterMutation {
1503 public:
1504  StoreClusterMutation(const TargetInstrInfo *tii,
1505  const TargetRegisterInfo *tri)
1506  : BaseMemOpClusterMutation(tii, tri, false) {}
1507 };
1508 
1509 class LoadClusterMutation : public BaseMemOpClusterMutation {
1510 public:
1511  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1512  : BaseMemOpClusterMutation(tii, tri, true) {}
1513 };
1514 
1515 } // end anonymous namespace
1516 
1517 namespace llvm {
1518 
1519 std::unique_ptr<ScheduleDAGMutation>
1521  const TargetRegisterInfo *TRI) {
1522  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1523  : nullptr;
1524 }
1525 
1526 std::unique_ptr<ScheduleDAGMutation>
1528  const TargetRegisterInfo *TRI) {
1529  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1530  : nullptr;
1531 }
1532 
1533 } // end namespace llvm
1534 
1535 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1536  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1537  SmallVector<MemOpInfo, 32> MemOpRecords;
1538  for (SUnit *SU : MemOps) {
1539  unsigned BaseReg;
1540  int64_t Offset;
1541  if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1542  MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1543  }
1544  if (MemOpRecords.size() < 2)
1545  return;
1546 
1547  llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
1548  unsigned ClusterLength = 1;
1549  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1550  SUnit *SUa = MemOpRecords[Idx].SU;
1551  SUnit *SUb = MemOpRecords[Idx+1].SU;
1552  if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1553  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1554  ClusterLength) &&
1555  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1556  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1557  << SUb->NodeNum << ")\n");
1558  // Copy successor edges from SUa to SUb. Interleaving computation
1559  // dependent on SUa can prevent load combining due to register reuse.
1560  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1561  // loads should have effectively the same inputs.
1562  for (const SDep &Succ : SUa->Succs) {
1563  if (Succ.getSUnit() == SUb)
1564  continue;
1565  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1566  << ")\n");
1567  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1568  }
1569  ++ClusterLength;
1570  } else
1571  ClusterLength = 1;
1572  }
1573 }
1574 
1575 /// Callback from DAG postProcessing to create cluster edges for loads.
1577  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1578 
1579  // Map DAG NodeNum to store chain ID.
1580  DenseMap<unsigned, unsigned> StoreChainIDs;
1581  // Map each store chain to a set of dependent MemOps.
1582  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1583  for (SUnit &SU : DAG->SUnits) {
1584  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1585  (!IsLoad && !SU.getInstr()->mayStore()))
1586  continue;
1587 
1588  unsigned ChainPredID = DAG->SUnits.size();
1589  for (const SDep &Pred : SU.Preds) {
1590  if (Pred.isCtrl()) {
1591  ChainPredID = Pred.getSUnit()->NodeNum;
1592  break;
1593  }
1594  }
1595  // Check if this chain-like pred has been seen
1596  // before. ChainPredID==MaxNodeID at the top of the schedule.
1597  unsigned NumChains = StoreChainDependents.size();
1598  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1599  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1600  if (Result.second)
1601  StoreChainDependents.resize(NumChains + 1);
1602  StoreChainDependents[Result.first->second].push_back(&SU);
1603  }
1604 
1605  // Iterate over the store chains.
1606  for (auto &SCD : StoreChainDependents)
1607  clusterNeighboringMemOps(SCD, DAG);
1608 }
1609 
1610 //===----------------------------------------------------------------------===//
1611 // CopyConstrain - DAG post-processing to encourage copy elimination.
1612 //===----------------------------------------------------------------------===//
1613 
1614 namespace {
1615 
1616 /// Post-process the DAG to create weak edges from all uses of a copy to
1617 /// the one use that defines the copy's source vreg, most likely an induction
1618 /// variable increment.
1619 class CopyConstrain : public ScheduleDAGMutation {
1620  // Transient state.
1621  SlotIndex RegionBeginIdx;
1622 
1623  // RegionEndIdx is the slot index of the last non-debug instruction in the
1624  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1625  SlotIndex RegionEndIdx;
1626 
1627 public:
1628  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1629 
1630  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1631 
1632 protected:
1633  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1634 };
1635 
1636 } // end anonymous namespace
1637 
1638 namespace llvm {
1639 
1640 std::unique_ptr<ScheduleDAGMutation>
1642  const TargetRegisterInfo *TRI) {
1643  return llvm::make_unique<CopyConstrain>(TII, TRI);
1644 }
1645 
1646 } // end namespace llvm
1647 
1648 /// constrainLocalCopy handles two possibilities:
1649 /// 1) Local src:
1650 /// I0: = dst
1651 /// I1: src = ...
1652 /// I2: = dst
1653 /// I3: dst = src (copy)
1654 /// (create pred->succ edges I0->I1, I2->I1)
1655 ///
1656 /// 2) Local copy:
1657 /// I0: dst = src (copy)
1658 /// I1: = dst
1659 /// I2: src = ...
1660 /// I3: = dst
1661 /// (create pred->succ edges I1->I2, I3->I2)
1662 ///
1663 /// Although the MachineScheduler is currently constrained to single blocks,
1664 /// this algorithm should handle extended blocks. An EBB is a set of
1665 /// contiguously numbered blocks such that the previous block in the EBB is
1666 /// always the single predecessor.
1667 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1668  LiveIntervals *LIS = DAG->getLIS();
1669  MachineInstr *Copy = CopySU->getInstr();
1670 
1671  // Check for pure vreg copies.
1672  const MachineOperand &SrcOp = Copy->getOperand(1);
1673  unsigned SrcReg = SrcOp.getReg();
1674  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1675  return;
1676 
1677  const MachineOperand &DstOp = Copy->getOperand(0);
1678  unsigned DstReg = DstOp.getReg();
1679  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1680  return;
1681 
1682  // Check if either the dest or source is local. If it's live across a back
1683  // edge, it's not local. Note that if both vregs are live across the back
1684  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1685  // If both the copy's source and dest are local live intervals, then we
1686  // should treat the dest as the global for the purpose of adding
1687  // constraints. This adds edges from source's other uses to the copy.
1688  unsigned LocalReg = SrcReg;
1689  unsigned GlobalReg = DstReg;
1690  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1691  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1692  LocalReg = DstReg;
1693  GlobalReg = SrcReg;
1694  LocalLI = &LIS->getInterval(LocalReg);
1695  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1696  return;
1697  }
1698  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1699 
1700  // Find the global segment after the start of the local LI.
1701  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1702  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1703  // local live range. We could create edges from other global uses to the local
1704  // start, but the coalescer should have already eliminated these cases, so
1705  // don't bother dealing with it.
1706  if (GlobalSegment == GlobalLI->end())
1707  return;
1708 
1709  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1710  // returned the next global segment. But if GlobalSegment overlaps with
1711  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1712  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1713  if (GlobalSegment->contains(LocalLI->beginIndex()))
1714  ++GlobalSegment;
1715 
1716  if (GlobalSegment == GlobalLI->end())
1717  return;
1718 
1719  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1720  if (GlobalSegment != GlobalLI->begin()) {
1721  // Two address defs have no hole.
1722  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1723  GlobalSegment->start)) {
1724  return;
1725  }
1726  // If the prior global segment may be defined by the same two-address
1727  // instruction that also defines LocalLI, then can't make a hole here.
1728  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1729  LocalLI->beginIndex())) {
1730  return;
1731  }
1732  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1733  // it would be a disconnected component in the live range.
1734  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1735  "Disconnected LRG within the scheduling region.");
1736  }
1737  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1738  if (!GlobalDef)
1739  return;
1740 
1741  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1742  if (!GlobalSU)
1743  return;
1744 
1745  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1746  // constraining the uses of the last local def to precede GlobalDef.
1747  SmallVector<SUnit*,8> LocalUses;
1748  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1749  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1750  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1751  for (const SDep &Succ : LastLocalSU->Succs) {
1752  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1753  continue;
1754  if (Succ.getSUnit() == GlobalSU)
1755  continue;
1756  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1757  return;
1758  LocalUses.push_back(Succ.getSUnit());
1759  }
1760  // Open the top of the GlobalLI hole by constraining any earlier global uses
1761  // to precede the start of LocalLI.
1762  SmallVector<SUnit*,8> GlobalUses;
1763  MachineInstr *FirstLocalDef =
1764  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1765  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1766  for (const SDep &Pred : GlobalSU->Preds) {
1767  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1768  continue;
1769  if (Pred.getSUnit() == FirstLocalSU)
1770  continue;
1771  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1772  return;
1773  GlobalUses.push_back(Pred.getSUnit());
1774  }
1775  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1776  // Add the weak edges.
1778  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1779  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1780  << GlobalSU->NodeNum << ")\n");
1781  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1782  }
1784  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1785  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1786  << FirstLocalSU->NodeNum << ")\n");
1787  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1788  }
1789 }
1790 
1791 /// Callback from DAG postProcessing to create weak edges to encourage
1792 /// copy elimination.
1793 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1794  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1795  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1796 
1797  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1798  if (FirstPos == DAG->end())
1799  return;
1800  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1801  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1802  *priorNonDebug(DAG->end(), DAG->begin()));
1803 
1804  for (SUnit &SU : DAG->SUnits) {
1805  if (!SU.getInstr()->isCopy())
1806  continue;
1807 
1808  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1809  }
1810 }
1811 
1812 //===----------------------------------------------------------------------===//
1813 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1814 // and possibly other custom schedulers.
1815 //===----------------------------------------------------------------------===//
1816 
1817 static const unsigned InvalidCycle = ~0U;
1818 
1819 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1820 
1821 /// Given a Count of resource usage and a Latency value, return true if a
1822 /// SchedBoundary becomes resource limited.
1823 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1824  unsigned Latency) {
1825  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1826 }
1827 
1829  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1830  // Destroying and reconstructing it is very expensive though. So keep
1831  // invalid, placeholder HazardRecs.
1832  if (HazardRec && HazardRec->isEnabled()) {
1833  delete HazardRec;
1834  HazardRec = nullptr;
1835  }
1836  Available.clear();
1837  Pending.clear();
1838  CheckPending = false;
1839  CurrCycle = 0;
1840  CurrMOps = 0;
1841  MinReadyCycle = std::numeric_limits<unsigned>::max();
1842  ExpectedLatency = 0;
1843  DependentLatency = 0;
1844  RetiredMOps = 0;
1845  MaxExecutedResCount = 0;
1846  ZoneCritResIdx = 0;
1847  IsResourceLimited = false;
1848  ReservedCycles.clear();
1849 #ifndef NDEBUG
1850  // Track the maximum number of stall cycles that could arise either from the
1851  // latency of a DAG edge or the number of cycles that a processor resource is
1852  // reserved (SchedBoundary::ReservedCycles).
1853  MaxObservedStall = 0;
1854 #endif
1855  // Reserve a zero-count for invalid CritResIdx.
1856  ExecutedResCounts.resize(1);
1857  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1858 }
1859 
1860 void SchedRemainder::
1861 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1862  reset();
1863  if (!SchedModel->hasInstrSchedModel())
1864  return;
1865  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1866  for (SUnit &SU : DAG->SUnits) {
1867  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1868  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1869  * SchedModel->getMicroOpFactor();
1871  PI = SchedModel->getWriteProcResBegin(SC),
1872  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1873  unsigned PIdx = PI->ProcResourceIdx;
1874  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1875  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1876  }
1877  }
1878 }
1879 
1880 void SchedBoundary::
1882  reset();
1883  DAG = dag;
1884  SchedModel = smodel;
1885  Rem = rem;
1886  if (SchedModel->hasInstrSchedModel()) {
1887  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1888  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1889  }
1890 }
1891 
1892 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1893 /// these "soft stalls" differently than the hard stall cycles based on CPU
1894 /// resources and computed by checkHazard(). A fully in-order model
1895 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1896 /// available for scheduling until they are ready. However, a weaker in-order
1897 /// model may use this for heuristics. For example, if a processor has in-order
1898 /// behavior when reading certain resources, this may come into play.
1900  if (!SU->isUnbuffered)
1901  return 0;
1902 
1903  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1904  if (ReadyCycle > CurrCycle)
1905  return ReadyCycle - CurrCycle;
1906  return 0;
1907 }
1908 
1909 /// Compute the next cycle at which the given processor resource can be
1910 /// scheduled.
1911 unsigned SchedBoundary::
1912 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1913  unsigned NextUnreserved = ReservedCycles[PIdx];
1914  // If this resource has never been used, always return cycle zero.
1915  if (NextUnreserved == InvalidCycle)
1916  return 0;
1917  // For bottom-up scheduling add the cycles needed for the current operation.
1918  if (!isTop())
1919  NextUnreserved += Cycles;
1920  return NextUnreserved;
1921 }
1922 
1923 /// Does this SU have a hazard within the current instruction group.
1924 ///
1925 /// The scheduler supports two modes of hazard recognition. The first is the
1926 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1927 /// supports highly complicated in-order reservation tables
1928 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1929 ///
1930 /// The second is a streamlined mechanism that checks for hazards based on
1931 /// simple counters that the scheduler itself maintains. It explicitly checks
1932 /// for instruction dispatch limitations, including the number of micro-ops that
1933 /// can dispatch per cycle.
1934 ///
1935 /// TODO: Also check whether the SU must start a new group.
1937  if (HazardRec->isEnabled()
1938  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1939  return true;
1940  }
1941 
1942  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1943  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1944  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1945  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1946  return true;
1947  }
1948 
1949  if (CurrMOps > 0 &&
1950  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1951  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1952  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1953  << (isTop() ? "begin" : "end") << " group\n");
1954  return true;
1955  }
1956 
1957  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1958  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1959  for (const MCWriteProcResEntry &PE :
1960  make_range(SchedModel->getWriteProcResBegin(SC),
1961  SchedModel->getWriteProcResEnd(SC))) {
1962  unsigned ResIdx = PE.ProcResourceIdx;
1963  unsigned Cycles = PE.Cycles;
1964  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1965  if (NRCycle > CurrCycle) {
1966 #ifndef NDEBUG
1967  MaxObservedStall = std::max(Cycles, MaxObservedStall);
1968 #endif
1969  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1970  << SchedModel->getResourceName(ResIdx) << "="
1971  << NRCycle << "c\n");
1972  return true;
1973  }
1974  }
1975  }
1976  return false;
1977 }
1978 
1979 // Find the unscheduled node in ReadySUs with the highest latency.
1980 unsigned SchedBoundary::
1982  SUnit *LateSU = nullptr;
1983  unsigned RemLatency = 0;
1984  for (SUnit *SU : ReadySUs) {
1985  unsigned L = getUnscheduledLatency(SU);
1986  if (L > RemLatency) {
1987  RemLatency = L;
1988  LateSU = SU;
1989  }
1990  }
1991  if (LateSU) {
1992  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1993  << LateSU->NodeNum << ") " << RemLatency << "c\n");
1994  }
1995  return RemLatency;
1996 }
1997 
1998 // Count resources in this zone and the remaining unscheduled
1999 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2000 // resource index, or zero if the zone is issue limited.
2001 unsigned SchedBoundary::
2002 getOtherResourceCount(unsigned &OtherCritIdx) {
2003  OtherCritIdx = 0;
2004  if (!SchedModel->hasInstrSchedModel())
2005  return 0;
2006 
2007  unsigned OtherCritCount = Rem->RemIssueCount
2008  + (RetiredMOps * SchedModel->getMicroOpFactor());
2009  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2010  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2011  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2012  PIdx != PEnd; ++PIdx) {
2013  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2014  if (OtherCount > OtherCritCount) {
2015  OtherCritCount = OtherCount;
2016  OtherCritIdx = PIdx;
2017  }
2018  }
2019  if (OtherCritIdx) {
2020  LLVM_DEBUG(
2021  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2022  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2023  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2024  }
2025  return OtherCritCount;
2026 }
2027 
2028 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2029  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2030 
2031 #ifndef NDEBUG
2032  // ReadyCycle was been bumped up to the CurrCycle when this node was
2033  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2034  // scheduling, so may now be greater than ReadyCycle.
2035  if (ReadyCycle > CurrCycle)
2036  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2037 #endif
2038 
2039  if (ReadyCycle < MinReadyCycle)
2040  MinReadyCycle = ReadyCycle;
2041 
2042  // Check for interlocks first. For the purpose of other heuristics, an
2043  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2044  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2045  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2046  Available.size() >= ReadyListLimit)
2047  Pending.push(SU);
2048  else
2049  Available.push(SU);
2050 }
2051 
2052 /// Move the boundary of scheduled code by one cycle.
2053 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2054  if (SchedModel->getMicroOpBufferSize() == 0) {
2055  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2056  "MinReadyCycle uninitialized");
2057  if (MinReadyCycle > NextCycle)
2058  NextCycle = MinReadyCycle;
2059  }
2060  // Update the current micro-ops, which will issue in the next cycle.
2061  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2062  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2063 
2064  // Decrement DependentLatency based on the next cycle.
2065  if ((NextCycle - CurrCycle) > DependentLatency)
2066  DependentLatency = 0;
2067  else
2068  DependentLatency -= (NextCycle - CurrCycle);
2069 
2070  if (!HazardRec->isEnabled()) {
2071  // Bypass HazardRec virtual calls.
2072  CurrCycle = NextCycle;
2073  } else {
2074  // Bypass getHazardType calls in case of long latency.
2075  for (; CurrCycle != NextCycle; ++CurrCycle) {
2076  if (isTop())
2077  HazardRec->AdvanceCycle();
2078  else
2079  HazardRec->RecedeCycle();
2080  }
2081  }
2082  CheckPending = true;
2083  IsResourceLimited =
2084  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2085  getScheduledLatency());
2086 
2087  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2088  << '\n');
2089 }
2090 
2091 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2092  ExecutedResCounts[PIdx] += Count;
2093  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2094  MaxExecutedResCount = ExecutedResCounts[PIdx];
2095 }
2096 
2097 /// Add the given processor resource to this scheduled zone.
2098 ///
2099 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2100 /// during which this resource is consumed.
2101 ///
2102 /// \return the next cycle at which the instruction may execute without
2103 /// oversubscribing resources.
2104 unsigned SchedBoundary::
2105 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2106  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2107  unsigned Count = Factor * Cycles;
2108  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2109  << Cycles << "x" << Factor << "u\n");
2110 
2111  // Update Executed resources counts.
2112  incExecutedResources(PIdx, Count);
2113  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2114  Rem->RemainingCounts[PIdx] -= Count;
2115 
2116  // Check if this resource exceeds the current critical resource. If so, it
2117  // becomes the critical resource.
2118  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2119  ZoneCritResIdx = PIdx;
2120  LLVM_DEBUG(dbgs() << " *** Critical resource "
2121  << SchedModel->getResourceName(PIdx) << ": "
2122  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2123  << "c\n");
2124  }
2125  // For reserved resources, record the highest cycle using the resource.
2126  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2127  if (NextAvailable > CurrCycle) {
2128  LLVM_DEBUG(dbgs() << " Resource conflict: "
2129  << SchedModel->getProcResource(PIdx)->Name
2130  << " reserved until @" << NextAvailable << "\n");
2131  }
2132  return NextAvailable;
2133 }
2134 
2135 /// Move the boundary of scheduled code by one SUnit.
2137  // Update the reservation table.
2138  if (HazardRec->isEnabled()) {
2139  if (!isTop() && SU->isCall) {
2140  // Calls are scheduled with their preceding instructions. For bottom-up
2141  // scheduling, clear the pipeline state before emitting.
2142  HazardRec->Reset();
2143  }
2144  HazardRec->EmitInstruction(SU);
2145  }
2146  // checkHazard should prevent scheduling multiple instructions per cycle that
2147  // exceed the issue width.
2148  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2149  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2150  assert(
2151  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2152  "Cannot schedule this instruction's MicroOps in the current cycle.");
2153 
2154  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2155  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2156 
2157  unsigned NextCycle = CurrCycle;
2158  switch (SchedModel->getMicroOpBufferSize()) {
2159  case 0:
2160  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2161  break;
2162  case 1:
2163  if (ReadyCycle > NextCycle) {
2164  NextCycle = ReadyCycle;
2165  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2166  }
2167  break;
2168  default:
2169  // We don't currently model the OOO reorder buffer, so consider all
2170  // scheduled MOps to be "retired". We do loosely model in-order resource
2171  // latency. If this instruction uses an in-order resource, account for any
2172  // likely stall cycles.
2173  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2174  NextCycle = ReadyCycle;
2175  break;
2176  }
2177  RetiredMOps += IncMOps;
2178 
2179  // Update resource counts and critical resource.
2180  if (SchedModel->hasInstrSchedModel()) {
2181  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2182  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2183  Rem->RemIssueCount -= DecRemIssue;
2184  if (ZoneCritResIdx) {
2185  // Scale scheduled micro-ops for comparing with the critical resource.
2186  unsigned ScaledMOps =
2187  RetiredMOps * SchedModel->getMicroOpFactor();
2188 
2189  // If scaled micro-ops are now more than the previous critical resource by
2190  // a full cycle, then micro-ops issue becomes critical.
2191  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2192  >= (int)SchedModel->getLatencyFactor()) {
2193  ZoneCritResIdx = 0;
2194  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2195  << ScaledMOps / SchedModel->getLatencyFactor()
2196  << "c\n");
2197  }
2198  }
2200  PI = SchedModel->getWriteProcResBegin(SC),
2201  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2202  unsigned RCycle =
2203  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2204  if (RCycle > NextCycle)
2205  NextCycle = RCycle;
2206  }
2207  if (SU->hasReservedResource) {
2208  // For reserved resources, record the highest cycle using the resource.
2209  // For top-down scheduling, this is the cycle in which we schedule this
2210  // instruction plus the number of cycles the operations reserves the
2211  // resource. For bottom-up is it simply the instruction's cycle.
2213  PI = SchedModel->getWriteProcResBegin(SC),
2214  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2215  unsigned PIdx = PI->ProcResourceIdx;
2216  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2217  if (isTop()) {
2218  ReservedCycles[PIdx] =
2219  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2220  }
2221  else
2222  ReservedCycles[PIdx] = NextCycle;
2223  }
2224  }
2225  }
2226  }
2227  // Update ExpectedLatency and DependentLatency.
2228  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2229  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2230  if (SU->getDepth() > TopLatency) {
2231  TopLatency = SU->getDepth();
2232  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2233  << SU->NodeNum << ") " << TopLatency << "c\n");
2234  }
2235  if (SU->getHeight() > BotLatency) {
2236  BotLatency = SU->getHeight();
2237  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2238  << SU->NodeNum << ") " << BotLatency << "c\n");
2239  }
2240  // If we stall for any reason, bump the cycle.
2241  if (NextCycle > CurrCycle)
2242  bumpCycle(NextCycle);
2243  else
2244  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2245  // resource limited. If a stall occurred, bumpCycle does this.
2246  IsResourceLimited =
2247  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2248  getScheduledLatency());
2249 
2250  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2251  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2252  // one cycle. Since we commonly reach the max MOps here, opportunistically
2253  // bump the cycle to avoid uselessly checking everything in the readyQ.
2254  CurrMOps += IncMOps;
2255 
2256  // Bump the cycle count for issue group constraints.
2257  // This must be done after NextCycle has been adjust for all other stalls.
2258  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2259  // currCycle to X.
2260  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2261  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2262  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2263  << " group\n");
2264  bumpCycle(++NextCycle);
2265  }
2266 
2267  while (CurrMOps >= SchedModel->getIssueWidth()) {
2268  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2269  << CurrCycle << '\n');
2270  bumpCycle(++NextCycle);
2271  }
2272  LLVM_DEBUG(dumpScheduledState());
2273 }
2274 
2275 /// Release pending ready nodes in to the available queue. This makes them
2276 /// visible to heuristics.
2278  // If the available queue is empty, it is safe to reset MinReadyCycle.
2279  if (Available.empty())
2280  MinReadyCycle = std::numeric_limits<unsigned>::max();
2281 
2282  // Check to see if any of the pending instructions are ready to issue. If
2283  // so, add them to the available queue.
2284  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2285  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2286  SUnit *SU = *(Pending.begin()+i);
2287  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2288 
2289  if (ReadyCycle < MinReadyCycle)
2290  MinReadyCycle = ReadyCycle;
2291 
2292  if (!IsBuffered && ReadyCycle > CurrCycle)
2293  continue;
2294 
2295  if (checkHazard(SU))
2296  continue;
2297 
2298  if (Available.size() >= ReadyListLimit)
2299  break;
2300 
2301  Available.push(SU);
2302  Pending.remove(Pending.begin()+i);
2303  --i; --e;
2304  }
2305  CheckPending = false;
2306 }
2307 
2308 /// Remove SU from the ready set for this boundary.
2310  if (Available.isInQueue(SU))
2311  Available.remove(Available.find(SU));
2312  else {
2313  assert(Pending.isInQueue(SU) && "bad ready count");
2314  Pending.remove(Pending.find(SU));
2315  }
2316 }
2317 
2318 /// If this queue only has one ready candidate, return it. As a side effect,
2319 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2320 /// one node is ready. If multiple instructions are ready, return NULL.
2322  if (CheckPending)
2323  releasePending();
2324 
2325  if (CurrMOps > 0) {
2326  // Defer any ready instrs that now have a hazard.
2327  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2328  if (checkHazard(*I)) {
2329  Pending.push(*I);
2330  I = Available.remove(I);
2331  continue;
2332  }
2333  ++I;
2334  }
2335  }
2336  for (unsigned i = 0; Available.empty(); ++i) {
2337 // FIXME: Re-enable assert once PR20057 is resolved.
2338 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2339 // "permanent hazard");
2340  (void)i;
2341  bumpCycle(CurrCycle + 1);
2342  releasePending();
2343  }
2344 
2345  LLVM_DEBUG(Pending.dump());
2346  LLVM_DEBUG(Available.dump());
2347 
2348  if (Available.size() == 1)
2349  return *Available.begin();
2350  return nullptr;
2351 }
2352 
2353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2354 // This is useful information to dump after bumpNode.
2355 // Note that the Queue contents are more useful before pickNodeFromQueue.
2357  unsigned ResFactor;
2358  unsigned ResCount;
2359  if (ZoneCritResIdx) {
2360  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2361  ResCount = getResourceCount(ZoneCritResIdx);
2362  } else {
2363  ResFactor = SchedModel->getMicroOpFactor();
2364  ResCount = RetiredMOps * ResFactor;
2365  }
2366  unsigned LFactor = SchedModel->getLatencyFactor();
2367  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2368  << " Retired: " << RetiredMOps;
2369  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2370  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2371  << ResCount / ResFactor << " "
2372  << SchedModel->getResourceName(ZoneCritResIdx)
2373  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2374  << (IsResourceLimited ? " - Resource" : " - Latency")
2375  << " limited.\n";
2376 }
2377 #endif
2378 
2379 //===----------------------------------------------------------------------===//
2380 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2381 //===----------------------------------------------------------------------===//
2382 
2385  const TargetSchedModel *SchedModel) {
2386  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2387  return;
2388 
2389  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2391  PI = SchedModel->getWriteProcResBegin(SC),
2392  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2393  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2394  ResDelta.CritResources += PI->Cycles;
2395  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2396  ResDelta.DemandedResources += PI->Cycles;
2397  }
2398 }
2399 
2400 /// Set the CandPolicy given a scheduling zone given the current resources and
2401 /// latencies inside and outside the zone.
2402 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2403  SchedBoundary &CurrZone,
2404  SchedBoundary *OtherZone) {
2405  // Apply preemptive heuristics based on the total latency and resources
2406  // inside and outside this zone. Potential stalls should be considered before
2407  // following this policy.
2408 
2409  // Compute remaining latency. We need this both to determine whether the
2410  // overall schedule has become latency-limited and whether the instructions
2411  // outside this zone are resource or latency limited.
2412  //
2413  // The "dependent" latency is updated incrementally during scheduling as the
2414  // max height/depth of scheduled nodes minus the cycles since it was
2415  // scheduled:
2416  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2417  //
2418  // The "independent" latency is the max ready queue depth:
2419  // ILat = max N.depth for N in Available|Pending
2420  //
2421  // RemainingLatency is the greater of independent and dependent latency.
2422  unsigned RemLatency = CurrZone.getDependentLatency();
2423  RemLatency = std::max(RemLatency,
2424  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2425  RemLatency = std::max(RemLatency,
2426  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2427 
2428  // Compute the critical resource outside the zone.
2429  unsigned OtherCritIdx = 0;
2430  unsigned OtherCount =
2431  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2432 
2433  bool OtherResLimited = false;
2434  if (SchedModel->hasInstrSchedModel())
2435  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2436  OtherCount, RemLatency);
2437 
2438  // Schedule aggressively for latency in PostRA mode. We don't check for
2439  // acyclic latency during PostRA, and highly out-of-order processors will
2440  // skip PostRA scheduling.
2441  if (!OtherResLimited) {
2442  if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2443  Policy.ReduceLatency |= true;
2444  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2445  << " RemainingLatency " << RemLatency << " + "
2446  << CurrZone.getCurrCycle() << "c > CritPath "
2447  << Rem.CriticalPath << "\n");
2448  }
2449  }
2450  // If the same resource is limiting inside and outside the zone, do nothing.
2451  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2452  return;
2453 
2454  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2455  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2456  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2457  } if (OtherResLimited) dbgs()
2458  << " RemainingLimit: "
2459  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2460  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2461  << " Latency limited both directions.\n");
2462 
2463  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2464  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2465 
2466  if (OtherResLimited)
2467  Policy.DemandResIdx = OtherCritIdx;
2468 }
2469 
2470 #ifndef NDEBUG
2473  switch (Reason) {
2474  case NoCand: return "NOCAND ";
2475  case Only1: return "ONLY1 ";
2476  case PhysRegCopy: return "PREG-COPY ";
2477  case RegExcess: return "REG-EXCESS";
2478  case RegCritical: return "REG-CRIT ";
2479  case Stall: return "STALL ";
2480  case Cluster: return "CLUSTER ";
2481  case Weak: return "WEAK ";
2482  case RegMax: return "REG-MAX ";
2483  case ResourceReduce: return "RES-REDUCE";
2484  case ResourceDemand: return "RES-DEMAND";
2485  case TopDepthReduce: return "TOP-DEPTH ";
2486  case TopPathReduce: return "TOP-PATH ";
2487  case BotHeightReduce:return "BOT-HEIGHT";
2488  case BotPathReduce: return "BOT-PATH ";
2489  case NextDefUse: return "DEF-USE ";
2490  case NodeOrder: return "ORDER ";
2491  };
2492  llvm_unreachable("Unknown reason!");
2493 }
2494 
2496  PressureChange P;
2497  unsigned ResIdx = 0;
2498  unsigned Latency = 0;
2499  switch (Cand.Reason) {
2500  default:
2501  break;
2502  case RegExcess:
2503  P = Cand.RPDelta.Excess;
2504  break;
2505  case RegCritical:
2506  P = Cand.RPDelta.CriticalMax;
2507  break;
2508  case RegMax:
2509  P = Cand.RPDelta.CurrentMax;
2510  break;
2511  case ResourceReduce:
2512  ResIdx = Cand.Policy.ReduceResIdx;
2513  break;
2514  case ResourceDemand:
2515  ResIdx = Cand.Policy.DemandResIdx;
2516  break;
2517  case TopDepthReduce:
2518  Latency = Cand.SU->getDepth();
2519  break;
2520  case TopPathReduce:
2521  Latency = Cand.SU->getHeight();
2522  break;
2523  case BotHeightReduce:
2524  Latency = Cand.SU->getHeight();
2525  break;
2526  case BotPathReduce:
2527  Latency = Cand.SU->getDepth();
2528  break;
2529  }
2530  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2531  if (P.isValid())
2532  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2533  << ":" << P.getUnitInc() << " ";
2534  else
2535  dbgs() << " ";
2536  if (ResIdx)
2537  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2538  else
2539  dbgs() << " ";
2540  if (Latency)
2541  dbgs() << " " << Latency << " cycles ";
2542  else
2543  dbgs() << " ";
2544  dbgs() << '\n';
2545 }
2546 #endif
2547 
2548 namespace llvm {
2549 /// Return true if this heuristic determines order.
2550 bool tryLess(int TryVal, int CandVal,
2554  if (TryVal < CandVal) {
2555  TryCand.Reason = Reason;
2556  return true;
2557  }
2558  if (TryVal > CandVal) {
2559  if (Cand.Reason > Reason)
2560  Cand.Reason = Reason;
2561  return true;
2562  }
2563  return false;
2564 }
2565 
2566 bool tryGreater(int TryVal, int CandVal,
2570  if (TryVal > CandVal) {
2571  TryCand.Reason = Reason;
2572  return true;
2573  }
2574  if (TryVal < CandVal) {
2575  if (Cand.Reason > Reason)
2576  Cand.Reason = Reason;
2577  return true;
2578  }
2579  return false;
2580 }
2581 
2584  SchedBoundary &Zone) {
2585  if (Zone.isTop()) {
2586  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2587  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2588  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2589  return true;
2590  }
2591  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2592  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2593  return true;
2594  } else {
2595  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2596  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2597  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2598  return true;
2599  }
2600  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2601  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2602  return true;
2603  }
2604  return false;
2605 }
2606 } // end namespace llvm
2607 
2608 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2609  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2610  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2611 }
2612 
2614  tracePick(Cand.Reason, Cand.AtTop);
2615 }
2616 
2618  assert(dag->hasVRegLiveness() &&
2619  "(PreRA)GenericScheduler needs vreg liveness");
2620  DAG = static_cast<ScheduleDAGMILive*>(dag);
2621  SchedModel = DAG->getSchedModel();
2622  TRI = DAG->TRI;
2623 
2624  Rem.init(DAG, SchedModel);
2625  Top.init(DAG, SchedModel, &Rem);
2626  Bot.init(DAG, SchedModel, &Rem);
2627 
2628  // Initialize resource counts.
2629 
2630  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2631  // are disabled, then these HazardRecs will be disabled.
2632  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2633  if (!Top.HazardRec) {
2634  Top.HazardRec =
2635  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2636  Itin, DAG);
2637  }
2638  if (!Bot.HazardRec) {
2639  Bot.HazardRec =
2640  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2641  Itin, DAG);
2642  }
2643  TopCand.SU = nullptr;
2644  BotCand.SU = nullptr;
2645 }
2646 
2647 /// Initialize the per-region scheduling policy.
2650  unsigned NumRegionInstrs) {
2651  const MachineFunction &MF = *Begin->getMF();
2652  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2653 
2654  // Avoid setting up the register pressure tracker for small regions to save
2655  // compile time. As a rough heuristic, only track pressure when the number of
2656  // schedulable instructions exceeds half the integer register file.
2657  RegionPolicy.ShouldTrackPressure = true;
2658  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2659  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2660  if (TLI->isTypeLegal(LegalIntVT)) {
2661  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2662  TLI->getRegClassFor(LegalIntVT));
2663  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2664  }
2665  }
2666 
2667  // For generic targets, we default to bottom-up, because it's simpler and more
2668  // compile-time optimizations have been implemented in that direction.
2669  RegionPolicy.OnlyBottomUp = true;
2670 
2671  // Allow the subtarget to override default policy.
2672  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2673 
2674  // After subtarget overrides, apply command line options.
2675  if (!EnableRegPressure)
2676  RegionPolicy.ShouldTrackPressure = false;
2677 
2678  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2679  // e.g. -misched-bottomup=false allows scheduling in both directions.
2681  "-misched-topdown incompatible with -misched-bottomup");
2682  if (ForceBottomUp.getNumOccurrences() > 0) {
2683  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2684  if (RegionPolicy.OnlyBottomUp)
2685  RegionPolicy.OnlyTopDown = false;
2686  }
2687  if (ForceTopDown.getNumOccurrences() > 0) {
2688  RegionPolicy.OnlyTopDown = ForceTopDown;
2689  if (RegionPolicy.OnlyTopDown)
2690  RegionPolicy.OnlyBottomUp = false;
2691  }
2692 }
2693 
2695  // Cannot completely remove virtual function even in release mode.
2696 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2697  dbgs() << "GenericScheduler RegionPolicy: "
2698  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2699  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2700  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2701  << "\n";
2702 #endif
2703 }
2704 
2705 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2706 /// critical path by more cycles than it takes to drain the instruction buffer.
2707 /// We estimate an upper bounds on in-flight instructions as:
2708 ///
2709 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2710 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2711 /// InFlightResources = InFlightIterations * LoopResources
2712 ///
2713 /// TODO: Check execution resources in addition to IssueCount.
2715  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2716  return;
2717 
2718  // Scaled number of cycles per loop iteration.
2719  unsigned IterCount =
2720  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2721  Rem.RemIssueCount);
2722  // Scaled acyclic critical path.
2723  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2724  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2725  unsigned InFlightCount =
2726  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2727  unsigned BufferLimit =
2728  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2729 
2730  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2731 
2732  LLVM_DEBUG(
2733  dbgs() << "IssueCycles="
2734  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2735  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2736  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2737  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2738  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2739  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2740 }
2741 
2743  Rem.CriticalPath = DAG->ExitSU.getDepth();
2744 
2745  // Some roots may not feed into ExitSU. Check all of them in case.
2746  for (const SUnit *SU : Bot.Available) {
2747  if (SU->getDepth() > Rem.CriticalPath)
2748  Rem.CriticalPath = SU->getDepth();
2749  }
2750  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2751  if (DumpCriticalPathLength) {
2752  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2753  }
2754 
2755  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2756  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2757  checkAcyclicLatency();
2758  }
2759 }
2760 
2761 namespace llvm {
2762 bool tryPressure(const PressureChange &TryP,
2763  const PressureChange &CandP,
2767  const TargetRegisterInfo *TRI,
2768  const MachineFunction &MF) {
2769  // If one candidate decreases and the other increases, go with it.
2770  // Invalid candidates have UnitInc==0.
2771  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2772  Reason)) {
2773  return true;
2774  }
2775  // Do not compare the magnitude of pressure changes between top and bottom
2776  // boundary.
2777  if (Cand.AtTop != TryCand.AtTop)
2778  return false;
2779 
2780  // If both candidates affect the same set in the same boundary, go with the
2781  // smallest increase.
2782  unsigned TryPSet = TryP.getPSetOrMax();
2783  unsigned CandPSet = CandP.getPSetOrMax();
2784  if (TryPSet == CandPSet) {
2785  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2786  Reason);
2787  }
2788 
2789  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2791 
2792  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2794 
2795  // If the candidates are decreasing pressure, reverse priority.
2796  if (TryP.getUnitInc() < 0)
2797  std::swap(TryRank, CandRank);
2798  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2799 }
2800 
2801 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2802  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2803 }
2804 
2805 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2806 /// their physreg def/use.
2807 ///
2808 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2809 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2810 /// with the operation that produces or consumes the physreg. We'll do this when
2811 /// regalloc has support for parallel copies.
2812 int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2813  const MachineInstr *MI = SU->getInstr();
2814  if (!MI->isCopy())
2815  return 0;
2816 
2817  unsigned ScheduledOper = isTop ? 1 : 0;
2818  unsigned UnscheduledOper = isTop ? 0 : 1;
2819  // If we have already scheduled the physreg produce/consumer, immediately
2820  // schedule the copy.
2822  MI->getOperand(ScheduledOper).getReg()))
2823  return 1;
2824  // If the physreg is at the boundary, defer it. Otherwise schedule it
2825  // immediately to free the dependent. We can hoist the copy later.
2826  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2828  MI->getOperand(UnscheduledOper).getReg()))
2829  return AtBoundary ? -1 : 1;
2830  return 0;
2831 }
2832 } // end namespace llvm
2833 
2835  bool AtTop,
2836  const RegPressureTracker &RPTracker,
2837  RegPressureTracker &TempTracker) {
2838  Cand.SU = SU;
2839  Cand.AtTop = AtTop;
2840  if (DAG->isTrackingPressure()) {
2841  if (AtTop) {
2842  TempTracker.getMaxDownwardPressureDelta(
2843  Cand.SU->getInstr(),
2844  Cand.RPDelta,
2845  DAG->getRegionCriticalPSets(),
2847  } else {
2848  if (VerifyScheduling) {
2849  TempTracker.getMaxUpwardPressureDelta(
2850  Cand.SU->getInstr(),
2851  &DAG->getPressureDiff(Cand.SU),
2852  Cand.RPDelta,
2853  DAG->getRegionCriticalPSets(),
2855  } else {
2856  RPTracker.getUpwardPressureDelta(
2857  Cand.SU->getInstr(),
2858  DAG->getPressureDiff(Cand.SU),
2859  Cand.RPDelta,
2860  DAG->getRegionCriticalPSets(),
2862  }
2863  }
2864  }
2865  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2866  << " Try SU(" << Cand.SU->NodeNum << ") "
2867  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2868  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2869 }
2870 
2871 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2872 /// hierarchical. This may be more efficient than a graduated cost model because
2873 /// we don't need to evaluate all aspects of the model for each node in the
2874 /// queue. But it's really done to make the heuristics easier to debug and
2875 /// statistically analyze.
2876 ///
2877 /// \param Cand provides the policy and current best candidate.
2878 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2879 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2880 // if Cand is from a different zone than TryCand.
2882  SchedCandidate &TryCand,
2883  SchedBoundary *Zone) const {
2884  // Initialize the candidate if needed.
2885  if (!Cand.isValid()) {
2886  TryCand.Reason = NodeOrder;
2887  return;
2888  }
2889 
2890  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2891  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2892  TryCand, Cand, PhysRegCopy))
2893  return;
2894 
2895  // Avoid exceeding the target's limit.
2896  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2897  Cand.RPDelta.Excess,
2898  TryCand, Cand, RegExcess, TRI,
2899  DAG->MF))
2900  return;
2901 
2902  // Avoid increasing the max critical pressure in the scheduled region.
2903  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2904  Cand.RPDelta.CriticalMax,
2905  TryCand, Cand, RegCritical, TRI,
2906  DAG->MF))
2907  return;
2908 
2909  // We only compare a subset of features when comparing nodes between
2910  // Top and Bottom boundary. Some properties are simply incomparable, in many
2911  // other instances we should only override the other boundary if something
2912  // is a clear good pick on one boundary. Skip heuristics that are more
2913  // "tie-breaking" in nature.
2914  bool SameBoundary = Zone != nullptr;
2915  if (SameBoundary) {
2916  // For loops that are acyclic path limited, aggressively schedule for
2917  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2918  // heuristics to take precedence.
2919  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2920  tryLatency(TryCand, Cand, *Zone))
2921  return;
2922 
2923  // Prioritize instructions that read unbuffered resources by stall cycles.
2924  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2925  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2926  return;
2927  }
2928 
2929  // Keep clustered nodes together to encourage downstream peephole
2930  // optimizations which may reduce resource requirements.
2931  //
2932  // This is a best effort to set things up for a post-RA pass. Optimizations
2933  // like generating loads of multiple registers should ideally be done within
2934  // the scheduler pass by combining the loads during DAG postprocessing.
2935  const SUnit *CandNextClusterSU =
2936  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2937  const SUnit *TryCandNextClusterSU =
2938  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2939  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2940  Cand.SU == CandNextClusterSU,
2941  TryCand, Cand, Cluster))
2942  return;
2943 
2944  if (SameBoundary) {
2945  // Weak edges are for clustering and other constraints.
2946  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2947  getWeakLeft(Cand.SU, Cand.AtTop),
2948  TryCand, Cand, Weak))
2949  return;
2950  }
2951 
2952  // Avoid increasing the max pressure of the entire region.
2953  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2954  Cand.RPDelta.CurrentMax,
2955  TryCand, Cand, RegMax, TRI,
2956  DAG->MF))
2957  return;
2958 
2959  if (SameBoundary) {
2960  // Avoid critical resource consumption and balance the schedule.
2961  TryCand.initResourceDelta(DAG, SchedModel);
2963  TryCand, Cand, ResourceReduce))
2964  return;
2967  TryCand, Cand, ResourceDemand))
2968  return;
2969 
2970  // Avoid serializing long latency dependence chains.
2971  // For acyclic path limited loops, latency was already checked above.
2972  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2973  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2974  return;
2975 
2976  // Fall through to original instruction order.
2977  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2978  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2979  TryCand.Reason = NodeOrder;
2980  }
2981  }
2982 }
2983 
2984 /// Pick the best candidate from the queue.
2985 ///
2986 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2987 /// DAG building. To adjust for the current scheduling location we need to
2988 /// maintain the number of vreg uses remaining to be top-scheduled.
2990  const CandPolicy &ZonePolicy,
2991  const RegPressureTracker &RPTracker,
2992  SchedCandidate &Cand) {
2993  // getMaxPressureDelta temporarily modifies the tracker.
2994  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2995 
2996  ReadyQueue &Q = Zone.Available;
2997  for (SUnit *SU : Q) {
2998 
2999  SchedCandidate TryCand(ZonePolicy);
3000  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3001  // Pass SchedBoundary only when comparing nodes from the same boundary.
3002  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3003  tryCandidate(Cand, TryCand, ZoneArg);
3004  if (TryCand.Reason != NoCand) {
3005  // Initialize resource delta if needed in case future heuristics query it.
3006  if (TryCand.ResDelta == SchedResourceDelta())
3007  TryCand.initResourceDelta(DAG, SchedModel);
3008  Cand.setBest(TryCand);
3009  LLVM_DEBUG(traceCandidate(Cand));
3010  }
3011  }
3012 }
3013 
3014 /// Pick the best candidate node from either the top or bottom queue.
3016  // Schedule as far as possible in the direction of no choice. This is most
3017  // efficient, but also provides the best heuristics for CriticalPSets.
3018  if (SUnit *SU = Bot.pickOnlyChoice()) {
3019  IsTopNode = false;
3020  tracePick(Only1, false);
3021  return SU;
3022  }
3023  if (SUnit *SU = Top.pickOnlyChoice()) {
3024  IsTopNode = true;
3025  tracePick(Only1, true);
3026  return SU;
3027  }
3028  // Set the bottom-up policy based on the state of the current bottom zone and
3029  // the instructions outside the zone, including the top zone.
3030  CandPolicy BotPolicy;
3031  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3032  // Set the top-down policy based on the state of the current top zone and
3033  // the instructions outside the zone, including the bottom zone.
3034  CandPolicy TopPolicy;
3035  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3036 
3037  // See if BotCand is still valid (because we previously scheduled from Top).
3038  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3039  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3040  BotCand.Policy != BotPolicy) {
3041  BotCand.reset(CandPolicy());
3042  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3043  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3044  } else {
3045  LLVM_DEBUG(traceCandidate(BotCand));
3046 #ifndef NDEBUG
3047  if (VerifyScheduling) {
3048  SchedCandidate TCand;
3049  TCand.reset(CandPolicy());
3050  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3051  assert(TCand.SU == BotCand.SU &&
3052  "Last pick result should correspond to re-picking right now");
3053  }
3054 #endif
3055  }
3056 
3057  // Check if the top Q has a better candidate.
3058  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3059  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3060  TopCand.Policy != TopPolicy) {
3061  TopCand.reset(CandPolicy());
3062  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3063  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3064  } else {
3065  LLVM_DEBUG(traceCandidate(TopCand));
3066 #ifndef NDEBUG
3067  if (VerifyScheduling) {
3068  SchedCandidate TCand;
3069  TCand.reset(CandPolicy());
3070  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3071  assert(TCand.SU == TopCand.SU &&
3072  "Last pick result should correspond to re-picking right now");
3073  }
3074 #endif
3075  }
3076 
3077  // Pick best from BotCand and TopCand.
3078  assert(BotCand.isValid());
3079  assert(TopCand.isValid());
3080  SchedCandidate Cand = BotCand;
3081  TopCand.Reason = NoCand;
3082  tryCandidate(Cand, TopCand, nullptr);
3083  if (TopCand.Reason != NoCand) {
3084  Cand.setBest(TopCand);
3085  LLVM_DEBUG(traceCandidate(Cand));
3086  }
3087 
3088  IsTopNode = Cand.AtTop;
3089  tracePick(Cand);
3090  return Cand.SU;
3091 }
3092 
3093 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3095  if (DAG->top() == DAG->bottom()) {
3096  assert(Top.Available.empty() && Top.Pending.empty() &&
3097  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3098  return nullptr;
3099  }
3100  SUnit *SU;
3101  do {
3102  if (RegionPolicy.OnlyTopDown) {
3103  SU = Top.pickOnlyChoice();
3104  if (!SU) {
3105  CandPolicy NoPolicy;
3106  TopCand.reset(NoPolicy);
3107  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3108  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3109  tracePick(TopCand);
3110  SU = TopCand.SU;
3111  }
3112  IsTopNode = true;
3113  } else if (RegionPolicy.OnlyBottomUp) {
3114  SU = Bot.pickOnlyChoice();
3115  if (!SU) {
3116  CandPolicy NoPolicy;
3117  BotCand.reset(NoPolicy);
3118  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3119  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3120  tracePick(BotCand);
3121  SU = BotCand.SU;
3122  }
3123  IsTopNode = false;
3124  } else {
3125  SU = pickNodeBidirectional(IsTopNode);
3126  }
3127  } while (SU->isScheduled);
3128 
3129  if (SU->isTopReady())
3130  Top.removeReady(SU);
3131  if (SU->isBottomReady())
3132  Bot.removeReady(SU);
3133 
3134  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3135  << *SU->getInstr());
3136  return SU;
3137 }
3138 
3140  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3141  if (!isTop)
3142  ++InsertPos;
3143  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3144 
3145  // Find already scheduled copies with a single physreg dependence and move
3146  // them just above the scheduled instruction.
3147  for (SDep &Dep : Deps) {
3148  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3149  continue;
3150  SUnit *DepSU = Dep.getSUnit();
3151  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3152  continue;
3153  MachineInstr *Copy = DepSU->getInstr();
3154  if (!Copy->isCopy())
3155  continue;
3156  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3157  Dep.getSUnit()->dump(DAG));
3158  DAG->moveInstruction(Copy, InsertPos);
3159  }
3160 }
3161 
3162 /// Update the scheduler's state after scheduling a node. This is the same node
3163 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3164 /// update it's state based on the current cycle before MachineSchedStrategy
3165 /// does.
3166 ///
3167 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3168 /// them here. See comments in biasPhysRegCopy.
3169 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3170  if (IsTopNode) {
3171  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3172  Top.bumpNode(SU);
3173  if (SU->hasPhysRegUses)
3174  reschedulePhysRegCopies(SU, true);
3175  } else {
3176  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3177  Bot.bumpNode(SU);
3178  if (SU->hasPhysRegDefs)
3179  reschedulePhysRegCopies(SU, false);
3180  }
3181 }
3182 
3183 /// Create the standard converging machine scheduler. This will be used as the
3184 /// default scheduler if the target does not set a default.
3186  ScheduleDAGMILive *DAG =
3187  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3188  // Register DAG post-processors.
3189  //
3190  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3191  // data and pass it to later mutations. Have a single mutation that gathers
3192  // the interesting nodes in one pass.
3194  return DAG;
3195 }
3196 
3198  return createGenericSchedLive(C);
3199 }
3200 
3201 static MachineSchedRegistry
3202 GenericSchedRegistry("converge", "Standard converging scheduler.",
3204 
3205 //===----------------------------------------------------------------------===//
3206 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3207 //===----------------------------------------------------------------------===//
3208 
3210  DAG = Dag;
3211  SchedModel = DAG->getSchedModel();
3212  TRI = DAG->TRI;
3213 
3214  Rem.init(DAG, SchedModel);
3215  Top.init(DAG, SchedModel, &Rem);
3216  BotRoots.clear();
3217 
3218  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3219  // or are disabled, then these HazardRecs will be disabled.
3220  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3221  if (!Top.HazardRec) {
3222  Top.HazardRec =
3224  Itin, DAG);
3225  }
3226 }
3227 
3229  Rem.CriticalPath = DAG->ExitSU.getDepth();
3230 
3231  // Some roots may not feed into ExitSU. Check all of them in case.
3232  for (const SUnit *SU : BotRoots) {
3233  if (SU->getDepth() > Rem.CriticalPath)
3234  Rem.CriticalPath = SU->getDepth();
3235  }
3236  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3237  if (DumpCriticalPathLength) {
3238  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3239  }
3240 }
3241 
3242 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3243 ///
3244 /// \param Cand provides the policy and current best candidate.
3245 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3247  SchedCandidate &TryCand) {
3248  // Initialize the candidate if needed.
3249  if (!Cand.isValid()) {
3250  TryCand.Reason = NodeOrder;
3251  return;
3252  }
3253 
3254  // Prioritize instructions that read unbuffered resources by stall cycles.
3255  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3256  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3257  return;
3258 
3259  // Keep clustered nodes together.
3260  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3261  Cand.SU == DAG->getNextClusterSucc(),
3262  TryCand, Cand, Cluster))
3263  return;
3264 
3265  // Avoid critical resource consumption and balance the schedule.
3266  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3267  TryCand, Cand, ResourceReduce))
3268  return;
3270  Cand.ResDelta.DemandedResources,
3271  TryCand, Cand, ResourceDemand))
3272  return;
3273 
3274  // Avoid serializing long latency dependence chains.
3275  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3276  return;
3277  }
3278 
3279  // Fall through to original instruction order.
3280  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3281  TryCand.Reason = NodeOrder;
3282 }
3283 
3285  ReadyQueue &Q = Top.Available;
3286  for (SUnit *SU : Q) {
3287  SchedCandidate TryCand(Cand.Policy);
3288  TryCand.SU = SU;
3289  TryCand.AtTop = true;
3290  TryCand.initResourceDelta(DAG, SchedModel);
3291  tryCandidate(Cand, TryCand);
3292  if (TryCand.Reason != NoCand) {
3293  Cand.setBest(TryCand);
3294  LLVM_DEBUG(traceCandidate(Cand));
3295  }
3296  }
3297 }
3298 
3299 /// Pick the next node to schedule.
3301  if (DAG->top() == DAG->bottom()) {
3302  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3303  return nullptr;
3304  }
3305  SUnit *SU;
3306  do {
3307  SU = Top.pickOnlyChoice();
3308  if (SU) {
3309  tracePick(Only1, true);
3310  } else {
3311  CandPolicy NoPolicy;
3312  SchedCandidate TopCand(NoPolicy);
3313  // Set the top-down policy based on the state of the current top zone and
3314  // the instructions outside the zone, including the bottom zone.
3315  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3316  pickNodeFromQueue(TopCand);
3317  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3318  tracePick(TopCand);
3319  SU = TopCand.SU;
3320  }
3321  } while (SU->isScheduled);
3322 
3323  IsTopNode = true;
3324  Top.removeReady(SU);
3325 
3326  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3327  << *SU->getInstr());
3328  return SU;
3329 }
3330 
3331 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3332 /// scheduled/remaining flags in the DAG nodes.
3333 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3334  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3335  Top.bumpNode(SU);
3336 }
3337 
3339  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3340  /*RemoveKillFlags=*/true);
3341 }
3342 
3343 //===----------------------------------------------------------------------===//
3344 // ILP Scheduler. Currently for experimental analysis of heuristics.
3345 //===----------------------------------------------------------------------===//
3346 
3347 namespace {
3348 
3349 /// Order nodes by the ILP metric.
3350 struct ILPOrder {
3351  const SchedDFSResult *DFSResult = nullptr;
3352  const BitVector *ScheduledTrees = nullptr;
3353  bool MaximizeILP;
3354 
3355  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3356 
3357  /// Apply a less-than relation on node priority.
3358  ///
3359  /// (Return true if A comes after B in the Q.)
3360  bool operator()(const SUnit *A, const SUnit *B) const {
3361  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3362  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3363  if (SchedTreeA != SchedTreeB) {
3364  // Unscheduled trees have lower priority.
3365  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3366  return ScheduledTrees->test(SchedTreeB);
3367 
3368  // Trees with shallower connections have have lower priority.
3369  if (DFSResult->getSubtreeLevel(SchedTreeA)
3370  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3371  return DFSResult->getSubtreeLevel(SchedTreeA)
3372  < DFSResult->getSubtreeLevel(SchedTreeB);
3373  }
3374  }
3375  if (MaximizeILP)
3376  return DFSResult->getILP(A) < DFSResult->getILP(B);
3377  else
3378  return DFSResult->getILP(A) > DFSResult->getILP(B);
3379  }
3380 };
3381 
3382 /// Schedule based on the ILP metric.
3383 class ILPScheduler : public MachineSchedStrategy {
3384  ScheduleDAGMILive *DAG = nullptr;
3385  ILPOrder Cmp;
3386 
3387  std::vector<SUnit*> ReadyQ;
3388 
3389 public:
3390  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3391 
3392  void initialize(ScheduleDAGMI *dag) override {
3393  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3394  DAG = static_cast<ScheduleDAGMILive*>(dag);
3395  DAG->computeDFSResult();
3396  Cmp.DFSResult = DAG->getDFSResult();
3397  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3398  ReadyQ.clear();
3399  }
3400 
3401  void registerRoots() override {
3402  // Restore the heap in ReadyQ with the updated DFS results.
3403  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3404  }
3405 
3406  /// Implement MachineSchedStrategy interface.
3407  /// -----------------------------------------
3408 
3409  /// Callback to select the highest priority node from the ready Q.
3410  SUnit *pickNode(bool &IsTopNode) override {
3411  if (ReadyQ.empty()) return nullptr;
3412  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3413  SUnit *SU = ReadyQ.back();
3414  ReadyQ.pop_back();
3415  IsTopNode = false;
3416  LLVM_DEBUG(dbgs() << "Pick node "
3417  << "SU(" << SU->NodeNum << ") "
3418  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3419  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3420  << " @"
3421  << DAG->getDFSResult()->getSubtreeLevel(
3422  DAG->getDFSResult()->getSubtreeID(SU))
3423  << '\n'
3424  << "Scheduling " << *SU->getInstr());
3425  return SU;
3426  }
3427 
3428  /// Scheduler callback to notify that a new subtree is scheduled.
3429  void scheduleTree(unsigned SubtreeID) override {
3430  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3431  }
3432 
3433  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3434  /// DFSResults, and resort the priority Q.
3435  void schedNode(SUnit *SU, bool IsTopNode) override {
3436  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3437  }
3438 
3439  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3440 
3441  void releaseBottomNode(SUnit *SU) override {
3442  ReadyQ.push_back(SU);
3443  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3444  }
3445 };
3446 
3447 } // end anonymous namespace
3448 
3450  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3451 }
3453  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3454 }
3455 
3457  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3459  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3460 
3461 //===----------------------------------------------------------------------===//
3462 // Machine Instruction Shuffler for Correctness Testing
3463 //===----------------------------------------------------------------------===//
3464 
3465 #ifndef NDEBUG
3466 namespace {
3467 
3468 /// Apply a less-than relation on the node order, which corresponds to the
3469 /// instruction order prior to scheduling. IsReverse implements greater-than.
3470 template<bool IsReverse>
3471 struct SUnitOrder {
3472  bool operator()(SUnit *A, SUnit *B) const {
3473  if (IsReverse)
3474  return A->NodeNum > B->NodeNum;
3475  else
3476  return A->NodeNum < B->NodeNum;
3477  }
3478 };
3479 
3480 /// Reorder instructions as much as possible.
3481 class InstructionShuffler : public MachineSchedStrategy {
3482  bool IsAlternating;
3483  bool IsTopDown;
3484 
3485  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3486  // gives nodes with a higher number higher priority causing the latest
3487  // instructions to be scheduled first.
3488  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3489  TopQ;
3490 
3491  // When scheduling bottom-up, use greater-than as the queue priority.
3493  BottomQ;
3494 
3495 public:
3496  InstructionShuffler(bool alternate, bool topdown)
3497  : IsAlternating(alternate), IsTopDown(topdown) {}
3498 
3499  void initialize(ScheduleDAGMI*) override {
3500  TopQ.clear();
3501  BottomQ.clear();
3502  }
3503 
3504  /// Implement MachineSchedStrategy interface.
3505  /// -----------------------------------------
3506 
3507  SUnit *pickNode(bool &IsTopNode) override {
3508  SUnit *SU;
3509  if (IsTopDown) {
3510  do {
3511  if (TopQ.empty()) return nullptr;
3512  SU = TopQ.top();
3513  TopQ.pop();
3514  } while (SU->isScheduled);
3515  IsTopNode = true;
3516  } else {
3517  do {
3518  if (BottomQ.empty()) return nullptr;
3519  SU = BottomQ.top();
3520  BottomQ.pop();
3521  } while (SU->isScheduled);
3522  IsTopNode = false;
3523  }
3524  if (IsAlternating)
3525  IsTopDown = !IsTopDown;
3526  return SU;
3527  }
3528 
3529  void schedNode(SUnit *SU, bool IsTopNode) override {}
3530 
3531  void releaseTopNode(SUnit *SU) override {
3532  TopQ.push(SU);
3533  }
3534  void releaseBottomNode(SUnit *SU) override {
3535  BottomQ.push(SU);
3536  }
3537 };
3538 
3539 } // end anonymous namespace
3540 
3542  bool Alternate = !ForceTopDown && !ForceBottomUp;
3543  bool TopDown = !ForceBottomUp;
3544  assert((TopDown || !ForceTopDown) &&
3545  "-misched-topdown incompatible with -misched-bottomup");
3546  return new ScheduleDAGMILive(
3547  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3548 }
3549 
3551  "shuffle", "Shuffle machine instructions alternating directions",
3553 #endif // !NDEBUG
3554 
3555 //===----------------------------------------------------------------------===//
3556 // GraphWriter support for ScheduleDAGMILive.
3557 //===----------------------------------------------------------------------===//
3558 
3559 #ifndef NDEBUG
3560 namespace llvm {
3561 
3562 template<> struct GraphTraits<
3564 
3565 template<>
3568 
3569  static std::string getGraphName(const ScheduleDAG *G) {
3570  return G->MF.getName();
3571  }
3572 
3573  static bool renderGraphFromBottomUp() {
3574  return true;
3575  }
3576 
3577  static bool isNodeHidden(const SUnit *Node) {
3578  if (ViewMISchedCutoff == 0)
3579  return false;
3580  return (Node->Preds.size() > ViewMISchedCutoff
3581  || Node->Succs.size() > ViewMISchedCutoff);
3582  }
3583 
3584  /// If you want to override the dot attributes printed for a particular
3585  /// edge, override this method.
3586  static std::string getEdgeAttributes(const SUnit *Node,
3587  SUnitIterator EI,
3588  const ScheduleDAG *Graph) {
3589  if (EI.isArtificialDep())
3590  return "color=cyan,style=dashed";
3591  if (EI.isCtrlDep())
3592  return "color=blue,style=dashed";
3593  return "";
3594  }
3595 
3596  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3597  std::string Str;
3598  raw_string_ostream SS(Str);
3599  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3600  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3601  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3602  SS << "SU:" << SU->NodeNum;
3603  if (DFS)
3604  SS << " I:" << DFS->getNumInstrs(SU);
3605  return SS.str();
3606  }
3607 
3608  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3609  return G->getGraphNodeLabel(SU);
3610  }
3611 
3612  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3613  std::string Str("shape=Mrecord");
3614  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3615  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3616  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3617  if (DFS) {
3618  Str += ",style=filled,fillcolor=\"#";
3619  Str += DOT::getColorString(DFS->getSubtreeID(N));
3620  Str += '"';
3621  }
3622  return Str;
3623  }
3624 };
3625 
3626 } // end namespace llvm
3627 #endif // NDEBUG
3628 
3629 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3630 /// rendered using 'dot'.
3631 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3632 #ifndef NDEBUG
3633  ViewGraph(this, Name, false, Title);
3634 #else
3635  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3636  << "systems with Graphviz or gv!\n";
3637 #endif // NDEBUG
3638 }
3639 
3640 /// Out-of-line implementation with no arguments is handy for gdb.
3642  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3643 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:107
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:75
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
void push_back(const T &Elt)
Definition: SmallVector.h:213
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:241
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
Segments::iterator iterator
Definition: LiveInterval.h:208
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dump(const ScheduleDAG *G) const
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:140
bool test(unsigned Idx) const
Definition: BitVector.h:502
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:454
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:305
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:212
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Result of a LiveRange query.
Definition: LiveInterval.h:90
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1173
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:237
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:304
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:276
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:273
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:451
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:672
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:380
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:371
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
static const unsigned End
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run &#39;dot&#39;, run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:348
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:74
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
Definition: MachineInstr.h:892
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
bool isDebugInstr() const
Definition: MachineInstr.h:851
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:493
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:275
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:299
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:71
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
LiveInterval & getInterval(unsigned Reg)
int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:181
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:141
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:211
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:659
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:477
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:373
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:506
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:207
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:195
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void finishBlock() override
Cleans up after scheduling in the given block.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:159
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:660
void resize(size_type N)
Definition: SmallVector.h:352