LLVM  7.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <limits>
64 #include <memory>
65 #include <string>
66 #include <tuple>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "machine-scheduler"
73 
74 namespace llvm {
75 
76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
77  cl::desc("Force top-down list scheduling"));
78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
79  cl::desc("Force bottom-up list scheduling"));
81 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
82  cl::desc("Print critical path length to stdout"));
83 
84 } // end namespace llvm
85 
86 #ifndef NDEBUG
87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
88  cl::desc("Pop up a window to show MISched dags after they are processed"));
89 
90 /// In some situations a few uninteresting nodes depend on nearly all other
91 /// nodes in the graph, provide a cutoff to hide them.
92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
93  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
94 
95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
96  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
97 
98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
99  cl::desc("Only schedule this function"));
100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
101  cl::desc("Only schedule this MBB#"));
102 #else
103 static bool ViewMISchedDAGs = false;
104 #endif // NDEBUG
105 
106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
107 /// size of the ready lists.
108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
109  cl::desc("Limit ready list to N instructions"), cl::init(256));
110 
111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
112  cl::desc("Enable register pressure scheduling."), cl::init(true));
113 
114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
115  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
116 
117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
118  cl::desc("Enable memop clustering."),
119  cl::init(true));
120 
121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
122  cl::desc("Verify machine instrs before and after machine scheduling"));
123 
124 // DAG subtrees must have at least this many nodes.
125 static const unsigned MinSubtreeSize = 8;
126 
127 // Pin the vtables to this file.
128 void MachineSchedStrategy::anchor() {}
129 
130 void ScheduleDAGMutation::anchor() {}
131 
132 //===----------------------------------------------------------------------===//
133 // Machine Instruction Scheduling Pass and Registry
134 //===----------------------------------------------------------------------===//
135 
137  RegClassInfo = new RegisterClassInfo();
138 }
139 
141  delete RegClassInfo;
142 }
143 
144 namespace {
145 
146 /// Base class for a machine scheduler class that can run at any point.
147 class MachineSchedulerBase : public MachineSchedContext,
148  public MachineFunctionPass {
149 public:
150  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
151 
152  void print(raw_ostream &O, const Module* = nullptr) const override;
153 
154 protected:
155  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
156 };
157 
158 /// MachineScheduler runs after coalescing and before register allocation.
159 class MachineScheduler : public MachineSchedulerBase {
160 public:
161  MachineScheduler();
162 
163  void getAnalysisUsage(AnalysisUsage &AU) const override;
164 
165  bool runOnMachineFunction(MachineFunction&) override;
166 
167  static char ID; // Class identification, replacement for typeinfo
168 
169 protected:
170  ScheduleDAGInstrs *createMachineScheduler();
171 };
172 
173 /// PostMachineScheduler runs after shortly before code emission.
174 class PostMachineScheduler : public MachineSchedulerBase {
175 public:
176  PostMachineScheduler();
177 
178  void getAnalysisUsage(AnalysisUsage &AU) const override;
179 
180  bool runOnMachineFunction(MachineFunction&) override;
181 
182  static char ID; // Class identification, replacement for typeinfo
183 
184 protected:
185  ScheduleDAGInstrs *createPostMachineScheduler();
186 };
187 
188 } // end anonymous namespace
189 
190 char MachineScheduler::ID = 0;
191 
193 
194 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
195  "Machine Instruction Scheduler", false, false)
200 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
202 
203 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
205 }
206 
207 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
208  AU.setPreservesCFG();
213  AU.addRequired<SlotIndexes>();
218 }
219 
220 char PostMachineScheduler::ID = 0;
221 
223 
224 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
225  "PostRA Machine Instruction Scheduler", false, false)
226 
227 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
229 }
230 
231 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
232  AU.setPreservesCFG();
237 }
238 
240 
241 /// A dummy default scheduler factory indicates whether the scheduler
242 /// is overridden on the command line.
244  return nullptr;
245 }
246 
247 /// MachineSchedOpt allows command line selection of the scheduler.
250 MachineSchedOpt("misched",
252  cl::desc("Machine instruction scheduler to use"));
253 
255 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
257 
259  "enable-misched",
260  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
261  cl::Hidden);
262 
264  "enable-post-misched",
265  cl::desc("Enable the post-ra machine instruction scheduling pass."),
266  cl::init(true), cl::Hidden);
267 
268 /// Decrement this iterator until reaching the top or a non-debug instr.
272  assert(I != Beg && "reached the top of the region, cannot decrement");
273  while (--I != Beg) {
274  if (!I->isDebugValue())
275  break;
276  }
277  return I;
278 }
279 
280 /// Non-const version.
286 }
287 
288 /// If this iterator is a debug value, increment until reaching the End or a
289 /// non-debug instruction.
293  for(; I != End; ++I) {
294  if (!I->isDebugValue())
295  break;
296  }
297  return I;
298 }
299 
300 /// Non-const version.
306 }
307 
308 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
309 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
310  // Select the scheduler, or set the default.
311  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
312  if (Ctor != useDefaultMachineSched)
313  return Ctor(this);
314 
315  // Get the default scheduler set by the target for this function.
316  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
317  if (Scheduler)
318  return Scheduler;
319 
320  // Default to GenericScheduler.
321  return createGenericSchedLive(this);
322 }
323 
324 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
325 /// the caller. We don't have a command line option to override the postRA
326 /// scheduler. The Target must configure it.
327 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
328  // Get the postRA scheduler set by the target for this function.
329  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
330  if (Scheduler)
331  return Scheduler;
332 
333  // Default to GenericScheduler.
334  return createGenericSchedPostRA(this);
335 }
336 
337 /// Top-level MachineScheduler pass driver.
338 ///
339 /// Visit blocks in function order. Divide each block into scheduling regions
340 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
341 /// consistent with the DAG builder, which traverses the interior of the
342 /// scheduling regions bottom-up.
343 ///
344 /// This design avoids exposing scheduling boundaries to the DAG builder,
345 /// simplifying the DAG builder's support for "special" target instructions.
346 /// At the same time the design allows target schedulers to operate across
347 /// scheduling boundaries, for example to bundle the boudary instructions
348 /// without reordering them. This creates complexity, because the target
349 /// scheduler must update the RegionBegin and RegionEnd positions cached by
350 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
351 /// design would be to split blocks at scheduling boundaries, but LLVM has a
352 /// general bias against block splitting purely for implementation simplicity.
353 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
354  if (skipFunction(mf.getFunction()))
355  return false;
356 
357  if (EnableMachineSched.getNumOccurrences()) {
358  if (!EnableMachineSched)
359  return false;
360  } else if (!mf.getSubtarget().enableMachineScheduler())
361  return false;
362 
363  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
364 
365  // Initialize the context of the pass.
366  MF = &mf;
367  MLI = &getAnalysis<MachineLoopInfo>();
368  MDT = &getAnalysis<MachineDominatorTree>();
369  PassConfig = &getAnalysis<TargetPassConfig>();
370  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
371 
372  LIS = &getAnalysis<LiveIntervals>();
373 
374  if (VerifyScheduling) {
375  DEBUG(LIS->dump());
376  MF->verify(this, "Before machine scheduling.");
377  }
378  RegClassInfo->runOnMachineFunction(*MF);
379 
380  // Instantiate the selected scheduler for this target, function, and
381  // optimization level.
382  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
383  scheduleRegions(*Scheduler, false);
384 
385  DEBUG(LIS->dump());
386  if (VerifyScheduling)
387  MF->verify(this, "After machine scheduling.");
388  return true;
389 }
390 
391 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
392  if (skipFunction(mf.getFunction()))
393  return false;
394 
395  if (EnablePostRAMachineSched.getNumOccurrences()) {
396  if (!EnablePostRAMachineSched)
397  return false;
398  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
399  DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
400  return false;
401  }
402  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
403 
404  // Initialize the context of the pass.
405  MF = &mf;
406  MLI = &getAnalysis<MachineLoopInfo>();
407  PassConfig = &getAnalysis<TargetPassConfig>();
408 
409  if (VerifyScheduling)
410  MF->verify(this, "Before post machine scheduling.");
411 
412  // Instantiate the selected scheduler for this target, function, and
413  // optimization level.
414  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
415  scheduleRegions(*Scheduler, true);
416 
417  if (VerifyScheduling)
418  MF->verify(this, "After post machine scheduling.");
419  return true;
420 }
421 
422 /// Return true of the given instruction should not be included in a scheduling
423 /// region.
424 ///
425 /// MachineScheduler does not currently support scheduling across calls. To
426 /// handle calls, the DAG builder needs to be modified to create register
427 /// anti/output dependencies on the registers clobbered by the call's regmask
428 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
429 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
430 /// the boundary, but there would be no benefit to postRA scheduling across
431 /// calls this late anyway.
433  MachineBasicBlock *MBB,
434  MachineFunction *MF,
435  const TargetInstrInfo *TII) {
436  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
437 }
438 
439 /// A region of an MBB for scheduling.
440 namespace {
441 struct SchedRegion {
442  /// RegionBegin is the first instruction in the scheduling region, and
443  /// RegionEnd is either MBB->end() or the scheduling boundary after the
444  /// last instruction in the scheduling region. These iterators cannot refer
445  /// to instructions outside of the identified scheduling region because
446  /// those may be reordered before scheduling this region.
447  MachineBasicBlock::iterator RegionBegin;
448  MachineBasicBlock::iterator RegionEnd;
449  unsigned NumRegionInstrs;
450 
452  unsigned N) :
453  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
454 };
455 } // end anonymous namespace
456 
458 
459 static void
461  MBBRegionsVector &Regions,
462  bool RegionsTopDown) {
463  MachineFunction *MF = MBB->getParent();
464  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
465 
466  MachineBasicBlock::iterator I = nullptr;
467  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
468  RegionEnd != MBB->begin(); RegionEnd = I) {
469 
470  // Avoid decrementing RegionEnd for blocks with no terminator.
471  if (RegionEnd != MBB->end() ||
472  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
473  --RegionEnd;
474  }
475 
476  // The next region starts above the previous region. Look backward in the
477  // instruction stream until we find the nearest boundary.
478  unsigned NumRegionInstrs = 0;
479  I = RegionEnd;
480  for (;I != MBB->begin(); --I) {
481  MachineInstr &MI = *std::prev(I);
482  if (isSchedBoundary(&MI, &*MBB, MF, TII))
483  break;
484  if (!MI.isDebugValue())
485  // MBB::size() uses instr_iterator to count. Here we need a bundle to
486  // count as a single instruction.
487  ++NumRegionInstrs;
488  }
489 
490  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
491  }
492 
493  if (RegionsTopDown)
494  std::reverse(Regions.begin(), Regions.end());
495 }
496 
497 /// Main driver for both MachineScheduler and PostMachineScheduler.
498 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
499  bool FixKillFlags) {
500  // Visit all machine basic blocks.
501  //
502  // TODO: Visit blocks in global postorder or postorder within the bottom-up
503  // loop tree. Then we can optionally compute global RegPressure.
504  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
505  MBB != MBBEnd; ++MBB) {
506 
507  Scheduler.startBlock(&*MBB);
508 
509 #ifndef NDEBUG
510  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
511  continue;
512  if (SchedOnlyBlock.getNumOccurrences()
513  && (int)SchedOnlyBlock != MBB->getNumber())
514  continue;
515 #endif
516 
517  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
518  // points to the scheduling boundary at the bottom of the region. The DAG
519  // does not include RegionEnd, but the region does (i.e. the next
520  // RegionEnd is above the previous RegionBegin). If the current block has
521  // no terminator then RegionEnd == MBB->end() for the bottom region.
522  //
523  // All the regions of MBB are first found and stored in MBBRegions, which
524  // will be processed (MBB) top-down if initialized with true.
525  //
526  // The Scheduler may insert instructions during either schedule() or
527  // exitRegion(), even for empty regions. So the local iterators 'I' and
528  // 'RegionEnd' are invalid across these calls. Instructions must not be
529  // added to other regions than the current one without updating MBBRegions.
530 
531  MBBRegionsVector MBBRegions;
532  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
533  for (MBBRegionsVector::iterator R = MBBRegions.begin();
534  R != MBBRegions.end(); ++R) {
535  MachineBasicBlock::iterator I = R->RegionBegin;
536  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
537  unsigned NumRegionInstrs = R->NumRegionInstrs;
538 
539  // Notify the scheduler of the region, even if we may skip scheduling
540  // it. Perhaps it still needs to be bundled.
541  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
542 
543  // Skip empty scheduling regions (0 or 1 schedulable instructions).
544  if (I == RegionEnd || I == std::prev(RegionEnd)) {
545  // Close the current region. Bundle the terminator if needed.
546  // This invalidates 'RegionEnd' and 'I'.
547  Scheduler.exitRegion();
548  continue;
549  }
550  DEBUG(dbgs() << "********** MI Scheduling **********\n");
551  DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
552  << MBB->getName() << "\n From: " << *I << " To: ";
553  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
554  else dbgs() << "End";
555  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
557  errs() << MF->getName();
558  errs() << ":%bb. " << MBB->getNumber();
559  errs() << " " << MBB->getName() << " \n";
560  }
561 
562  // Schedule a region: possibly reorder instructions.
563  // This invalidates the original region iterators.
564  Scheduler.schedule();
565 
566  // Close the current region.
567  Scheduler.exitRegion();
568  }
569  Scheduler.finishBlock();
570  // FIXME: Ideally, no further passes should rely on kill flags. However,
571  // thumb2 size reduction is currently an exception, so the PostMIScheduler
572  // needs to do this.
573  if (FixKillFlags)
574  Scheduler.fixupKills(*MBB);
575  }
576  Scheduler.finalizeSchedule();
577 }
578 
579 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
580  // unimplemented
581 }
582 
583 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
585  dbgs() << "Queue " << Name << ": ";
586  for (const SUnit *SU : Queue)
587  dbgs() << SU->NodeNum << " ";
588  dbgs() << "\n";
589 }
590 #endif
591 
592 //===----------------------------------------------------------------------===//
593 // ScheduleDAGMI - Basic machine instruction scheduling. This is
594 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
595 // virtual registers.
596 // ===----------------------------------------------------------------------===/
597 
598 // Provide a vtable anchor.
600 
601 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
602  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
603 }
604 
605 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
606  if (SuccSU != &ExitSU) {
607  // Do not use WillCreateCycle, it assumes SD scheduling.
608  // If Pred is reachable from Succ, then the edge creates a cycle.
609  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
610  return false;
611  Topo.AddPred(SuccSU, PredDep.getSUnit());
612  }
613  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
614  // Return true regardless of whether a new edge needed to be inserted.
615  return true;
616 }
617 
618 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
619 /// NumPredsLeft reaches zero, release the successor node.
620 ///
621 /// FIXME: Adjust SuccSU height based on MinLatency.
622 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
623  SUnit *SuccSU = SuccEdge->getSUnit();
624 
625  if (SuccEdge->isWeak()) {
626  --SuccSU->WeakPredsLeft;
627  if (SuccEdge->isCluster())
628  NextClusterSucc = SuccSU;
629  return;
630  }
631 #ifndef NDEBUG
632  if (SuccSU->NumPredsLeft == 0) {
633  dbgs() << "*** Scheduling failed! ***\n";
634  SuccSU->dump(this);
635  dbgs() << " has been released too many times!\n";
636  llvm_unreachable(nullptr);
637  }
638 #endif
639  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
640  // CurrCycle may have advanced since then.
641  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
642  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
643 
644  --SuccSU->NumPredsLeft;
645  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
646  SchedImpl->releaseTopNode(SuccSU);
647 }
648 
649 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
651  for (SDep &Succ : SU->Succs)
652  releaseSucc(SU, &Succ);
653 }
654 
655 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
656 /// NumSuccsLeft reaches zero, release the predecessor node.
657 ///
658 /// FIXME: Adjust PredSU height based on MinLatency.
659 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
660  SUnit *PredSU = PredEdge->getSUnit();
661 
662  if (PredEdge->isWeak()) {
663  --PredSU->WeakSuccsLeft;
664  if (PredEdge->isCluster())
665  NextClusterPred = PredSU;
666  return;
667  }
668 #ifndef NDEBUG
669  if (PredSU->NumSuccsLeft == 0) {
670  dbgs() << "*** Scheduling failed! ***\n";
671  PredSU->dump(this);
672  dbgs() << " has been released too many times!\n";
673  llvm_unreachable(nullptr);
674  }
675 #endif
676  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
677  // CurrCycle may have advanced since then.
678  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
679  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
680 
681  --PredSU->NumSuccsLeft;
682  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
683  SchedImpl->releaseBottomNode(PredSU);
684 }
685 
686 /// releasePredecessors - Call releasePred on each of SU's predecessors.
688  for (SDep &Pred : SU->Preds)
689  releasePred(SU, &Pred);
690 }
691 
694  SchedImpl->enterMBB(bb);
695 }
696 
698  SchedImpl->leaveMBB();
700 }
701 
702 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
703 /// crossing a scheduling boundary. [begin, end) includes all instructions in
704 /// the region, including the boundary itself and single-instruction regions
705 /// that don't get scheduled.
709  unsigned regioninstrs)
710 {
711  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
712 
713  SchedImpl->initPolicy(begin, end, regioninstrs);
714 }
715 
716 /// This is normally called from the main scheduler loop but may also be invoked
717 /// by the scheduling strategy to perform additional code motion.
720  // Advance RegionBegin if the first instruction moves down.
721  if (&*RegionBegin == MI)
722  ++RegionBegin;
723 
724  // Update the instruction stream.
725  BB->splice(InsertPos, BB, MI);
726 
727  // Update LiveIntervals
728  if (LIS)
729  LIS->handleMove(*MI, /*UpdateFlags=*/true);
730 
731  // Recede RegionBegin if an instruction moves above the first.
732  if (RegionBegin == InsertPos)
733  RegionBegin = MI;
734 }
735 
737 #ifndef NDEBUG
738  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
739  CurrentTop = CurrentBottom;
740  return false;
741  }
742  ++NumInstrsScheduled;
743 #endif
744  return true;
745 }
746 
747 /// Per-region scheduling driver, called back from
748 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
749 /// does not consider liveness or register pressure. It is useful for PostRA
750 /// scheduling and potentially other custom schedulers.
752  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
753  DEBUG(SchedImpl->dumpPolicy());
754 
755  // Build the DAG.
756  buildSchedGraph(AA);
757 
758  Topo.InitDAGTopologicalSorting();
759 
760  postprocessDAG();
761 
762  SmallVector<SUnit*, 8> TopRoots, BotRoots;
763  findRootsAndBiasEdges(TopRoots, BotRoots);
764 
765  // Initialize the strategy before modifying the DAG.
766  // This may initialize a DFSResult to be used for queue priority.
767  SchedImpl->initialize(this);
768 
769  DEBUG(
770  if (EntrySU.getInstr() != nullptr)
771  EntrySU.dumpAll(this);
772  for (const SUnit &SU : SUnits)
773  SU.dumpAll(this);
774  if (ExitSU.getInstr() != nullptr)
775  ExitSU.dumpAll(this);
776  );
777  if (ViewMISchedDAGs) viewGraph();
778 
779  // Initialize ready queues now that the DAG and priority data are finalized.
780  initQueues(TopRoots, BotRoots);
781 
782  bool IsTopNode = false;
783  while (true) {
784  DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
785  SUnit *SU = SchedImpl->pickNode(IsTopNode);
786  if (!SU) break;
787 
788  assert(!SU->isScheduled && "Node already scheduled");
789  if (!checkSchedLimit())
790  break;
791 
792  MachineInstr *MI = SU->getInstr();
793  if (IsTopNode) {
794  assert(SU->isTopReady() && "node still has unscheduled dependencies");
795  if (&*CurrentTop == MI)
796  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
797  else
798  moveInstruction(MI, CurrentTop);
799  } else {
800  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
802  priorNonDebug(CurrentBottom, CurrentTop);
803  if (&*priorII == MI)
804  CurrentBottom = priorII;
805  else {
806  if (&*CurrentTop == MI)
807  CurrentTop = nextIfDebug(++CurrentTop, priorII);
808  moveInstruction(MI, CurrentBottom);
809  CurrentBottom = MI;
810  }
811  }
812  // Notify the scheduling strategy before updating the DAG.
813  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
814  // runs, it can then use the accurate ReadyCycle time to determine whether
815  // newly released nodes can move to the readyQ.
816  SchedImpl->schedNode(SU, IsTopNode);
817 
818  updateQueues(SU, IsTopNode);
819  }
820  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
821 
822  placeDebugValues();
823 
824  DEBUG({
825  dbgs() << "*** Final schedule for "
826  << printMBBReference(*begin()->getParent()) << " ***\n";
827  dumpSchedule();
828  dbgs() << '\n';
829  });
830 }
831 
832 /// Apply each ScheduleDAGMutation step in order.
834  for (auto &m : Mutations)
835  m->apply(this);
836 }
837 
838 void ScheduleDAGMI::
840  SmallVectorImpl<SUnit*> &BotRoots) {
841  for (SUnit &SU : SUnits) {
842  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
843 
844  // Order predecessors so DFSResult follows the critical path.
845  SU.biasCriticalPath();
846 
847  // A SUnit is ready to top schedule if it has no predecessors.
848  if (!SU.NumPredsLeft)
849  TopRoots.push_back(&SU);
850  // A SUnit is ready to bottom schedule if it has no successors.
851  if (!SU.NumSuccsLeft)
852  BotRoots.push_back(&SU);
853  }
854  ExitSU.biasCriticalPath();
855 }
856 
857 /// Identify DAG roots and setup scheduler queues.
859  ArrayRef<SUnit*> BotRoots) {
860  NextClusterSucc = nullptr;
861  NextClusterPred = nullptr;
862 
863  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
864  //
865  // Nodes with unreleased weak edges can still be roots.
866  // Release top roots in forward order.
867  for (SUnit *SU : TopRoots)
868  SchedImpl->releaseTopNode(SU);
869 
870  // Release bottom roots in reverse order so the higher priority nodes appear
871  // first. This is more natural and slightly more efficient.
873  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
874  SchedImpl->releaseBottomNode(*I);
875  }
876 
877  releaseSuccessors(&EntrySU);
878  releasePredecessors(&ExitSU);
879 
880  SchedImpl->registerRoots();
881 
882  // Advance past initial DebugValues.
883  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
884  CurrentBottom = RegionEnd;
885 }
886 
887 /// Update scheduler queues after scheduling an instruction.
888 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
889  // Release dependent instructions for scheduling.
890  if (IsTopNode)
891  releaseSuccessors(SU);
892  else
893  releasePredecessors(SU);
894 
895  SU->isScheduled = true;
896 }
897 
898 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
900  // If first instruction was a DBG_VALUE then put it back.
901  if (FirstDbgValue) {
902  BB->splice(RegionBegin, BB, FirstDbgValue);
903  RegionBegin = FirstDbgValue;
904  }
905 
906  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
907  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
908  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
909  MachineInstr *DbgValue = P.first;
910  MachineBasicBlock::iterator OrigPrevMI = P.second;
911  if (&*RegionBegin == DbgValue)
912  ++RegionBegin;
913  BB->splice(++OrigPrevMI, BB, DbgValue);
914  if (OrigPrevMI == std::prev(RegionEnd))
915  RegionEnd = DbgValue;
916  }
917  DbgValues.clear();
918  FirstDbgValue = nullptr;
919 }
920 
921 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
923  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
924  if (SUnit *SU = getSUnit(&(*MI)))
925  SU->dump(this);
926  else
927  dbgs() << "Missing SUnit\n";
928  }
929 }
930 #endif
931 
932 //===----------------------------------------------------------------------===//
933 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
934 // preservation.
935 //===----------------------------------------------------------------------===//
936 
938  delete DFSResult;
939 }
940 
942  const MachineInstr &MI = *SU.getInstr();
943  for (const MachineOperand &MO : MI.operands()) {
944  if (!MO.isReg())
945  continue;
946  if (!MO.readsReg())
947  continue;
948  if (TrackLaneMasks && !MO.isUse())
949  continue;
950 
951  unsigned Reg = MO.getReg();
953  continue;
954 
955  // Ignore re-defs.
956  if (TrackLaneMasks) {
957  bool FoundDef = false;
958  for (const MachineOperand &MO2 : MI.operands()) {
959  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
960  FoundDef = true;
961  break;
962  }
963  }
964  if (FoundDef)
965  continue;
966  }
967 
968  // Record this local VReg use.
969  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
970  for (; UI != VRegUses.end(); ++UI) {
971  if (UI->SU == &SU)
972  break;
973  }
974  if (UI == VRegUses.end())
975  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
976  }
977 }
978 
979 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
980 /// crossing a scheduling boundary. [begin, end) includes all instructions in
981 /// the region, including the boundary itself and single-instruction regions
982 /// that don't get scheduled.
986  unsigned regioninstrs)
987 {
988  // ScheduleDAGMI initializes SchedImpl's per-region policy.
989  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
990 
991  // For convenience remember the end of the liveness region.
992  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
993 
994  SUPressureDiffs.clear();
995 
996  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
997  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
998 
999  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1000  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1001 }
1002 
1003 // Setup the register pressure trackers for the top scheduled top and bottom
1004 // scheduled regions.
1006  VRegUses.clear();
1007  VRegUses.setUniverse(MRI.getNumVirtRegs());
1008  for (SUnit &SU : SUnits)
1009  collectVRegUses(SU);
1010 
1011  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1012  ShouldTrackLaneMasks, false);
1013  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1014  ShouldTrackLaneMasks, false);
1015 
1016  // Close the RPTracker to finalize live ins.
1017  RPTracker.closeRegion();
1018 
1019  DEBUG(RPTracker.dump());
1020 
1021  // Initialize the live ins and live outs.
1022  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1023  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1024 
1025  // Close one end of the tracker so we can call
1026  // getMaxUpward/DownwardPressureDelta before advancing across any
1027  // instructions. This converts currently live regs into live ins/outs.
1028  TopRPTracker.closeTop();
1029  BotRPTracker.closeBottom();
1030 
1031  BotRPTracker.initLiveThru(RPTracker);
1032  if (!BotRPTracker.getLiveThru().empty()) {
1033  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1034  DEBUG(dbgs() << "Live Thru: ";
1035  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1036  };
1037 
1038  // For each live out vreg reduce the pressure change associated with other
1039  // uses of the same vreg below the live-out reaching def.
1040  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1041 
1042  // Account for liveness generated by the region boundary.
1043  if (LiveRegionEnd != RegionEnd) {
1045  BotRPTracker.recede(&LiveUses);
1046  updatePressureDiffs(LiveUses);
1047  }
1048 
1049  DEBUG(
1050  dbgs() << "Top Pressure:\n";
1051  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1052  dbgs() << "Bottom Pressure:\n";
1053  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1054  );
1055 
1056  assert((BotRPTracker.getPos() == RegionEnd ||
1057  (RegionEnd->isDebugValue() &&
1058  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1059  "Can't find the region bottom");
1060 
1061  // Cache the list of excess pressure sets in this region. This will also track
1062  // the max pressure in the scheduled code for these sets.
1063  RegionCriticalPSets.clear();
1064  const std::vector<unsigned> &RegionPressure =
1065  RPTracker.getPressure().MaxSetPressure;
1066  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1067  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1068  if (RegionPressure[i] > Limit) {
1069  DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1070  << " Limit " << Limit
1071  << " Actual " << RegionPressure[i] << "\n");
1072  RegionCriticalPSets.push_back(PressureChange(i));
1073  }
1074  }
1075  DEBUG(dbgs() << "Excess PSets: ";
1076  for (const PressureChange &RCPS : RegionCriticalPSets)
1077  dbgs() << TRI->getRegPressureSetName(
1078  RCPS.getPSet()) << " ";
1079  dbgs() << "\n");
1080 }
1081 
1084  const std::vector<unsigned> &NewMaxPressure) {
1085  const PressureDiff &PDiff = getPressureDiff(SU);
1086  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1087  for (const PressureChange &PC : PDiff) {
1088  if (!PC.isValid())
1089  break;
1090  unsigned ID = PC.getPSet();
1091  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1092  ++CritIdx;
1093  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1094  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1095  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1096  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1097  }
1098  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1099  if (NewMaxPressure[ID] >= Limit - 2) {
1100  DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1101  << NewMaxPressure[ID]
1102  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1103  << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1104  }
1105  }
1106 }
1107 
1108 /// Update the PressureDiff array for liveness after scheduling this
1109 /// instruction.
1111  ArrayRef<RegisterMaskPair> LiveUses) {
1112  for (const RegisterMaskPair &P : LiveUses) {
1113  unsigned Reg = P.RegUnit;
1114  /// FIXME: Currently assuming single-use physregs.
1115  if (!TRI->isVirtualRegister(Reg))
1116  continue;
1117 
1118  if (ShouldTrackLaneMasks) {
1119  // If the register has just become live then other uses won't change
1120  // this fact anymore => decrement pressure.
1121  // If the register has just become dead then other uses make it come
1122  // back to life => increment pressure.
1123  bool Decrement = P.LaneMask.any();
1124 
1125  for (const VReg2SUnit &V2SU
1126  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1127  SUnit &SU = *V2SU.SU;
1128  if (SU.isScheduled || &SU == &ExitSU)
1129  continue;
1130 
1131  PressureDiff &PDiff = getPressureDiff(&SU);
1132  PDiff.addPressureChange(Reg, Decrement, &MRI);
1133  DEBUG(
1134  dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1135  << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1136  << ' ' << *SU.getInstr();
1137  dbgs() << " to ";
1138  PDiff.dump(*TRI);
1139  );
1140  }
1141  } else {
1142  assert(P.LaneMask.any());
1143  DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1144  // This may be called before CurrentBottom has been initialized. However,
1145  // BotRPTracker must have a valid position. We want the value live into the
1146  // instruction or live out of the block, so ask for the previous
1147  // instruction's live-out.
1148  const LiveInterval &LI = LIS->getInterval(Reg);
1149  VNInfo *VNI;
1151  nextIfDebug(BotRPTracker.getPos(), BB->end());
1152  if (I == BB->end())
1153  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1154  else {
1155  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1156  VNI = LRQ.valueIn();
1157  }
1158  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1159  assert(VNI && "No live value at use.");
1160  for (const VReg2SUnit &V2SU
1161  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1162  SUnit *SU = V2SU.SU;
1163  // If this use comes before the reaching def, it cannot be a last use,
1164  // so decrease its pressure change.
1165  if (!SU->isScheduled && SU != &ExitSU) {
1166  LiveQueryResult LRQ =
1167  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1168  if (LRQ.valueIn() == VNI) {
1169  PressureDiff &PDiff = getPressureDiff(SU);
1170  PDiff.addPressureChange(Reg, true, &MRI);
1171  DEBUG(
1172  dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1173  << *SU->getInstr();
1174  dbgs() << " to ";
1175  PDiff.dump(*TRI);
1176  );
1177  }
1178  }
1179  }
1180  }
1181  }
1182 }
1183 
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1195  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196  DEBUG(SchedImpl->dumpPolicy());
1197  buildDAGWithRegPressure();
1198 
1199  Topo.InitDAGTopologicalSorting();
1200 
1201  postprocessDAG();
1202 
1203  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1204  findRootsAndBiasEdges(TopRoots, BotRoots);
1205 
1206  // Initialize the strategy before modifying the DAG.
1207  // This may initialize a DFSResult to be used for queue priority.
1208  SchedImpl->initialize(this);
1209 
1210  DEBUG(
1211  if (EntrySU.getInstr() != nullptr)
1212  EntrySU.dumpAll(this);
1213  for (const SUnit &SU : SUnits) {
1214  SU.dumpAll(this);
1215  if (ShouldTrackPressure) {
1216  dbgs() << " Pressure Diff : ";
1217  getPressureDiff(&SU).dump(*TRI);
1218  }
1219  dbgs() << " Single Issue : ";
1220  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1221  SchedModel.mustEndGroup(SU.getInstr()))
1222  dbgs() << "true;";
1223  else
1224  dbgs() << "false;";
1225  dbgs() << '\n';
1226  }
1227  if (ExitSU.getInstr() != nullptr)
1228  ExitSU.dumpAll(this);
1229  );
1230  if (ViewMISchedDAGs) viewGraph();
1231 
1232  // Initialize ready queues now that the DAG and priority data are finalized.
1233  initQueues(TopRoots, BotRoots);
1234 
1235  bool IsTopNode = false;
1236  while (true) {
1237  DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1238  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1239  if (!SU) break;
1240 
1241  assert(!SU->isScheduled && "Node already scheduled");
1242  if (!checkSchedLimit())
1243  break;
1244 
1245  scheduleMI(SU, IsTopNode);
1246 
1247  if (DFSResult) {
1248  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1249  if (!ScheduledTrees.test(SubtreeID)) {
1250  ScheduledTrees.set(SubtreeID);
1251  DFSResult->scheduleTree(SubtreeID);
1252  SchedImpl->scheduleTree(SubtreeID);
1253  }
1254  }
1255 
1256  // Notify the scheduling strategy after updating the DAG.
1257  SchedImpl->schedNode(SU, IsTopNode);
1258 
1259  updateQueues(SU, IsTopNode);
1260  }
1261  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1262 
1263  placeDebugValues();
1264 
1265  DEBUG({
1266  dbgs() << "*** Final schedule for "
1267  << printMBBReference(*begin()->getParent()) << " ***\n";
1268  dumpSchedule();
1269  dbgs() << '\n';
1270  });
1271 }
1272 
1273 /// Build the DAG and setup three register pressure trackers.
1275  if (!ShouldTrackPressure) {
1276  RPTracker.reset();
1277  RegionCriticalPSets.clear();
1278  buildSchedGraph(AA);
1279  return;
1280  }
1281 
1282  // Initialize the register pressure tracker used by buildSchedGraph.
1283  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1284  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1285 
1286  // Account for liveness generate by the region boundary.
1287  if (LiveRegionEnd != RegionEnd)
1288  RPTracker.recede();
1289 
1290  // Build the DAG, and compute current register pressure.
1291  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1292 
1293  // Initialize top/bottom trackers after computing region pressure.
1294  initRegPressure();
1295 }
1296 
1298  if (!DFSResult)
1299  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1300  DFSResult->clear();
1301  ScheduledTrees.clear();
1302  DFSResult->resize(SUnits.size());
1303  DFSResult->compute(SUnits);
1304  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1305 }
1306 
1307 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1308 /// only provides the critical path for single block loops. To handle loops that
1309 /// span blocks, we could use the vreg path latencies provided by
1310 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1311 /// available for use in the scheduler.
1312 ///
1313 /// The cyclic path estimation identifies a def-use pair that crosses the back
1314 /// edge and considers the depth and height of the nodes. For example, consider
1315 /// the following instruction sequence where each instruction has unit latency
1316 /// and defines an epomymous virtual register:
1317 ///
1318 /// a->b(a,c)->c(b)->d(c)->exit
1319 ///
1320 /// The cyclic critical path is a two cycles: b->c->b
1321 /// The acyclic critical path is four cycles: a->b->c->d->exit
1322 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1323 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1324 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1325 /// LiveInDepth = depth(b) = len(a->b) = 1
1326 ///
1327 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1328 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1329 /// CyclicCriticalPath = min(2, 2) = 2
1330 ///
1331 /// This could be relevant to PostRA scheduling, but is currently implemented
1332 /// assuming LiveIntervals.
1334  // This only applies to single block loop.
1335  if (!BB->isSuccessor(BB))
1336  return 0;
1337 
1338  unsigned MaxCyclicLatency = 0;
1339  // Visit each live out vreg def to find def/use pairs that cross iterations.
1340  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1341  unsigned Reg = P.RegUnit;
1342  if (!TRI->isVirtualRegister(Reg))
1343  continue;
1344  const LiveInterval &LI = LIS->getInterval(Reg);
1345  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1346  if (!DefVNI)
1347  continue;
1348 
1349  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1350  const SUnit *DefSU = getSUnit(DefMI);
1351  if (!DefSU)
1352  continue;
1353 
1354  unsigned LiveOutHeight = DefSU->getHeight();
1355  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1356  // Visit all local users of the vreg def.
1357  for (const VReg2SUnit &V2SU
1358  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1359  SUnit *SU = V2SU.SU;
1360  if (SU == &ExitSU)
1361  continue;
1362 
1363  // Only consider uses of the phi.
1364  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1365  if (!LRQ.valueIn()->isPHIDef())
1366  continue;
1367 
1368  // Assume that a path spanning two iterations is a cycle, which could
1369  // overestimate in strange cases. This allows cyclic latency to be
1370  // estimated as the minimum slack of the vreg's depth or height.
1371  unsigned CyclicLatency = 0;
1372  if (LiveOutDepth > SU->getDepth())
1373  CyclicLatency = LiveOutDepth - SU->getDepth();
1374 
1375  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1376  if (LiveInHeight > LiveOutHeight) {
1377  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1378  CyclicLatency = LiveInHeight - LiveOutHeight;
1379  } else
1380  CyclicLatency = 0;
1381 
1382  DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1383  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1384  if (CyclicLatency > MaxCyclicLatency)
1385  MaxCyclicLatency = CyclicLatency;
1386  }
1387  }
1388  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1389  return MaxCyclicLatency;
1390 }
1391 
1392 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1393 /// the Top RP tracker in case the region beginning has changed.
1395  ArrayRef<SUnit*> BotRoots) {
1396  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1397  if (ShouldTrackPressure) {
1398  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1399  TopRPTracker.setPos(CurrentTop);
1400  }
1401 }
1402 
1403 /// Move an instruction and update register pressure.
1404 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1405  // Move the instruction to its new location in the instruction stream.
1406  MachineInstr *MI = SU->getInstr();
1407 
1408  if (IsTopNode) {
1409  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1410  if (&*CurrentTop == MI)
1411  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1412  else {
1413  moveInstruction(MI, CurrentTop);
1414  TopRPTracker.setPos(MI);
1415  }
1416 
1417  if (ShouldTrackPressure) {
1418  // Update top scheduled pressure.
1419  RegisterOperands RegOpers;
1420  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1421  if (ShouldTrackLaneMasks) {
1422  // Adjust liveness and add missing dead+read-undef flags.
1423  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1424  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1425  } else {
1426  // Adjust for missing dead-def flags.
1427  RegOpers.detectDeadDefs(*MI, *LIS);
1428  }
1429 
1430  TopRPTracker.advance(RegOpers);
1431  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1432  DEBUG(
1433  dbgs() << "Top Pressure:\n";
1434  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1435  );
1436 
1437  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1438  }
1439  } else {
1440  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1441  MachineBasicBlock::iterator priorII =
1442  priorNonDebug(CurrentBottom, CurrentTop);
1443  if (&*priorII == MI)
1444  CurrentBottom = priorII;
1445  else {
1446  if (&*CurrentTop == MI) {
1447  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1448  TopRPTracker.setPos(CurrentTop);
1449  }
1450  moveInstruction(MI, CurrentBottom);
1451  CurrentBottom = MI;
1452  BotRPTracker.setPos(CurrentBottom);
1453  }
1454  if (ShouldTrackPressure) {
1455  RegisterOperands RegOpers;
1456  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1457  if (ShouldTrackLaneMasks) {
1458  // Adjust liveness and add missing dead+read-undef flags.
1459  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1460  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1461  } else {
1462  // Adjust for missing dead-def flags.
1463  RegOpers.detectDeadDefs(*MI, *LIS);
1464  }
1465 
1466  if (BotRPTracker.getPos() != CurrentBottom)
1467  BotRPTracker.recedeSkipDebugValues();
1469  BotRPTracker.recede(RegOpers, &LiveUses);
1470  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1471  DEBUG(
1472  dbgs() << "Bottom Pressure:\n";
1473  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1474  );
1475 
1476  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1477  updatePressureDiffs(LiveUses);
1478  }
1479  }
1480 }
1481 
1482 //===----------------------------------------------------------------------===//
1483 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1484 //===----------------------------------------------------------------------===//
1485 
1486 namespace {
1487 
1488 /// \brief Post-process the DAG to create cluster edges between neighboring
1489 /// loads or between neighboring stores.
1490 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1491  struct MemOpInfo {
1492  SUnit *SU;
1493  unsigned BaseReg;
1494  int64_t Offset;
1495 
1496  MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1497  : SU(su), BaseReg(reg), Offset(ofs) {}
1498 
1499  bool operator<(const MemOpInfo&RHS) const {
1500  return std::tie(BaseReg, Offset, SU->NodeNum) <
1501  std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1502  }
1503  };
1504 
1505  const TargetInstrInfo *TII;
1506  const TargetRegisterInfo *TRI;
1507  bool IsLoad;
1508 
1509 public:
1510  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1511  const TargetRegisterInfo *tri, bool IsLoad)
1512  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1513 
1514  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1515 
1516 protected:
1517  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1518 };
1519 
1520 class StoreClusterMutation : public BaseMemOpClusterMutation {
1521 public:
1522  StoreClusterMutation(const TargetInstrInfo *tii,
1523  const TargetRegisterInfo *tri)
1524  : BaseMemOpClusterMutation(tii, tri, false) {}
1525 };
1526 
1527 class LoadClusterMutation : public BaseMemOpClusterMutation {
1528 public:
1529  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1530  : BaseMemOpClusterMutation(tii, tri, true) {}
1531 };
1532 
1533 } // end anonymous namespace
1534 
1535 namespace llvm {
1536 
1537 std::unique_ptr<ScheduleDAGMutation>
1539  const TargetRegisterInfo *TRI) {
1540  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1541  : nullptr;
1542 }
1543 
1544 std::unique_ptr<ScheduleDAGMutation>
1546  const TargetRegisterInfo *TRI) {
1547  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1548  : nullptr;
1549 }
1550 
1551 } // end namespace llvm
1552 
1553 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1554  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1555  SmallVector<MemOpInfo, 32> MemOpRecords;
1556  for (SUnit *SU : MemOps) {
1557  unsigned BaseReg;
1558  int64_t Offset;
1559  if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1560  MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1561  }
1562  if (MemOpRecords.size() < 2)
1563  return;
1564 
1565  std::sort(MemOpRecords.begin(), MemOpRecords.end());
1566  unsigned ClusterLength = 1;
1567  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1568  SUnit *SUa = MemOpRecords[Idx].SU;
1569  SUnit *SUb = MemOpRecords[Idx+1].SU;
1570  if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1571  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1572  ClusterLength) &&
1573  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1574  DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1575  << SUb->NodeNum << ")\n");
1576  // Copy successor edges from SUa to SUb. Interleaving computation
1577  // dependent on SUa can prevent load combining due to register reuse.
1578  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1579  // loads should have effectively the same inputs.
1580  for (const SDep &Succ : SUa->Succs) {
1581  if (Succ.getSUnit() == SUb)
1582  continue;
1583  DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
1584  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1585  }
1586  ++ClusterLength;
1587  } else
1588  ClusterLength = 1;
1589  }
1590 }
1591 
1592 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1594  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1595 
1596  // Map DAG NodeNum to store chain ID.
1597  DenseMap<unsigned, unsigned> StoreChainIDs;
1598  // Map each store chain to a set of dependent MemOps.
1599  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1600  for (SUnit &SU : DAG->SUnits) {
1601  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1602  (!IsLoad && !SU.getInstr()->mayStore()))
1603  continue;
1604 
1605  unsigned ChainPredID = DAG->SUnits.size();
1606  for (const SDep &Pred : SU.Preds) {
1607  if (Pred.isCtrl()) {
1608  ChainPredID = Pred.getSUnit()->NodeNum;
1609  break;
1610  }
1611  }
1612  // Check if this chain-like pred has been seen
1613  // before. ChainPredID==MaxNodeID at the top of the schedule.
1614  unsigned NumChains = StoreChainDependents.size();
1615  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1616  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1617  if (Result.second)
1618  StoreChainDependents.resize(NumChains + 1);
1619  StoreChainDependents[Result.first->second].push_back(&SU);
1620  }
1621 
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChainDependents)
1624  clusterNeighboringMemOps(SCD, DAG);
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 // CopyConstrain - DAG post-processing to encourage copy elimination.
1629 //===----------------------------------------------------------------------===//
1630 
1631 namespace {
1632 
1633 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1634 /// the one use that defines the copy's source vreg, most likely an induction
1635 /// variable increment.
1636 class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639 
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643 
1644 public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646 
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648 
1649 protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651 };
1652 
1653 } // end anonymous namespace
1654 
1655 namespace llvm {
1656 
1657 std::unique_ptr<ScheduleDAGMutation>
1659  const TargetRegisterInfo *TRI) {
1660  return llvm::make_unique<CopyConstrain>(TII, TRI);
1661 }
1662 
1663 } // end namespace llvm
1664 
1665 /// constrainLocalCopy handles two possibilities:
1666 /// 1) Local src:
1667 /// I0: = dst
1668 /// I1: src = ...
1669 /// I2: = dst
1670 /// I3: dst = src (copy)
1671 /// (create pred->succ edges I0->I1, I2->I1)
1672 ///
1673 /// 2) Local copy:
1674 /// I0: dst = src (copy)
1675 /// I1: = dst
1676 /// I2: src = ...
1677 /// I3: = dst
1678 /// (create pred->succ edges I1->I2, I3->I2)
1679 ///
1680 /// Although the MachineScheduler is currently constrained to single blocks,
1681 /// this algorithm should handle extended blocks. An EBB is a set of
1682 /// contiguously numbered blocks such that the previous block in the EBB is
1683 /// always the single predecessor.
1684 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687 
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  unsigned SrcReg = SrcOp.getReg();
1691  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692  return;
1693 
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  unsigned DstReg = DstOp.getReg();
1696  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1697  return;
1698 
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709  LocalReg = DstReg;
1710  GlobalReg = SrcReg;
1711  LocalLI = &LIS->getInterval(LocalReg);
1712  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713  return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716 
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724  return;
1725 
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731  ++GlobalSegment;
1732 
1733  if (GlobalSegment == GlobalLI->end())
1734  return;
1735 
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738  // Two address defs have no hole.
1739  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740  GlobalSegment->start)) {
1741  return;
1742  }
1743  // If the prior global segment may be defined by the same two-address
1744  // instruction that also defines LocalLI, then can't make a hole here.
1745  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746  LocalLI->beginIndex())) {
1747  return;
1748  }
1749  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750  // it would be a disconnected component in the live range.
1751  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752  "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756  return;
1757 
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760  return;
1761 
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770  continue;
1771  if (Succ.getSUnit() == GlobalSU)
1772  continue;
1773  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774  return;
1775  LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785  continue;
1786  if (Pred.getSUnit() == FirstLocalSU)
1787  continue;
1788  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789  return;
1790  GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1795  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796  DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1797  << GlobalSU->NodeNum << ")\n");
1798  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1801  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802  DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1803  << FirstLocalSU->NodeNum << ")\n");
1804  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806 }
1807 
1808 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1809 /// copy elimination.
1810 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813 
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816  return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819  *priorNonDebug(DAG->end(), DAG->begin()));
1820 
1821  for (SUnit &SU : DAG->SUnits) {
1822  if (!SU.getInstr()->isCopy())
1823  continue;
1824 
1825  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831 // and possibly other custom schedulers.
1832 //===----------------------------------------------------------------------===//
1833 
1834 static const unsigned InvalidCycle = ~0U;
1835 
1836 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837 
1838 /// Given a Count of resource usage and a Latency value, return true if a
1839 /// SchedBoundary becomes resource limited.
1840 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1841  unsigned Latency) {
1842  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1843 }
1844 
1846  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1847  // Destroying and reconstructing it is very expensive though. So keep
1848  // invalid, placeholder HazardRecs.
1849  if (HazardRec && HazardRec->isEnabled()) {
1850  delete HazardRec;
1851  HazardRec = nullptr;
1852  }
1853  Available.clear();
1854  Pending.clear();
1855  CheckPending = false;
1856  CurrCycle = 0;
1857  CurrMOps = 0;
1858  MinReadyCycle = std::numeric_limits<unsigned>::max();
1859  ExpectedLatency = 0;
1860  DependentLatency = 0;
1861  RetiredMOps = 0;
1862  MaxExecutedResCount = 0;
1863  ZoneCritResIdx = 0;
1864  IsResourceLimited = false;
1865  ReservedCycles.clear();
1866 #ifndef NDEBUG
1867  // Track the maximum number of stall cycles that could arise either from the
1868  // latency of a DAG edge or the number of cycles that a processor resource is
1869  // reserved (SchedBoundary::ReservedCycles).
1870  MaxObservedStall = 0;
1871 #endif
1872  // Reserve a zero-count for invalid CritResIdx.
1873  ExecutedResCounts.resize(1);
1874  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1875 }
1876 
1877 void SchedRemainder::
1878 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1879  reset();
1880  if (!SchedModel->hasInstrSchedModel())
1881  return;
1882  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1883  for (SUnit &SU : DAG->SUnits) {
1884  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1885  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1886  * SchedModel->getMicroOpFactor();
1888  PI = SchedModel->getWriteProcResBegin(SC),
1889  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1890  unsigned PIdx = PI->ProcResourceIdx;
1891  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1892  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1893  }
1894  }
1895 }
1896 
1897 void SchedBoundary::
1899  reset();
1900  DAG = dag;
1901  SchedModel = smodel;
1902  Rem = rem;
1903  if (SchedModel->hasInstrSchedModel()) {
1904  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1905  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1906  }
1907 }
1908 
1909 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1910 /// these "soft stalls" differently than the hard stall cycles based on CPU
1911 /// resources and computed by checkHazard(). A fully in-order model
1912 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1913 /// available for scheduling until they are ready. However, a weaker in-order
1914 /// model may use this for heuristics. For example, if a processor has in-order
1915 /// behavior when reading certain resources, this may come into play.
1917  if (!SU->isUnbuffered)
1918  return 0;
1919 
1920  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1921  if (ReadyCycle > CurrCycle)
1922  return ReadyCycle - CurrCycle;
1923  return 0;
1924 }
1925 
1926 /// Compute the next cycle at which the given processor resource can be
1927 /// scheduled.
1928 unsigned SchedBoundary::
1929 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1930  unsigned NextUnreserved = ReservedCycles[PIdx];
1931  // If this resource has never been used, always return cycle zero.
1932  if (NextUnreserved == InvalidCycle)
1933  return 0;
1934  // For bottom-up scheduling add the cycles needed for the current operation.
1935  if (!isTop())
1936  NextUnreserved += Cycles;
1937  return NextUnreserved;
1938 }
1939 
1940 /// Does this SU have a hazard within the current instruction group.
1941 ///
1942 /// The scheduler supports two modes of hazard recognition. The first is the
1943 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1944 /// supports highly complicated in-order reservation tables
1945 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1946 ///
1947 /// The second is a streamlined mechanism that checks for hazards based on
1948 /// simple counters that the scheduler itself maintains. It explicitly checks
1949 /// for instruction dispatch limitations, including the number of micro-ops that
1950 /// can dispatch per cycle.
1951 ///
1952 /// TODO: Also check whether the SU must start a new group.
1954  if (HazardRec->isEnabled()
1955  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1956  return true;
1957  }
1958 
1959  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1960  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1961  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1962  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1963  return true;
1964  }
1965 
1966  if (CurrMOps > 0 &&
1967  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1968  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1969  DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1970  << (isTop()? "begin" : "end") << " group\n");
1971  return true;
1972  }
1973 
1974  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1975  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1976  for (const MCWriteProcResEntry &PE :
1977  make_range(SchedModel->getWriteProcResBegin(SC),
1978  SchedModel->getWriteProcResEnd(SC))) {
1979  unsigned ResIdx = PE.ProcResourceIdx;
1980  unsigned Cycles = PE.Cycles;
1981  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1982  if (NRCycle > CurrCycle) {
1983 #ifndef NDEBUG
1984  MaxObservedStall = std::max(Cycles, MaxObservedStall);
1985 #endif
1986  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1987  << SchedModel->getResourceName(ResIdx)
1988  << "=" << NRCycle << "c\n");
1989  return true;
1990  }
1991  }
1992  }
1993  return false;
1994 }
1995 
1996 // Find the unscheduled node in ReadySUs with the highest latency.
1997 unsigned SchedBoundary::
1999  SUnit *LateSU = nullptr;
2000  unsigned RemLatency = 0;
2001  for (SUnit *SU : ReadySUs) {
2002  unsigned L = getUnscheduledLatency(SU);
2003  if (L > RemLatency) {
2004  RemLatency = L;
2005  LateSU = SU;
2006  }
2007  }
2008  if (LateSU) {
2009  DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2010  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2011  }
2012  return RemLatency;
2013 }
2014 
2015 // Count resources in this zone and the remaining unscheduled
2016 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2017 // resource index, or zero if the zone is issue limited.
2018 unsigned SchedBoundary::
2019 getOtherResourceCount(unsigned &OtherCritIdx) {
2020  OtherCritIdx = 0;
2021  if (!SchedModel->hasInstrSchedModel())
2022  return 0;
2023 
2024  unsigned OtherCritCount = Rem->RemIssueCount
2025  + (RetiredMOps * SchedModel->getMicroOpFactor());
2026  DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2027  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2028  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2029  PIdx != PEnd; ++PIdx) {
2030  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2031  if (OtherCount > OtherCritCount) {
2032  OtherCritCount = OtherCount;
2033  OtherCritIdx = PIdx;
2034  }
2035  }
2036  if (OtherCritIdx) {
2037  DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
2038  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2039  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2040  }
2041  return OtherCritCount;
2042 }
2043 
2044 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2045  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2046 
2047 #ifndef NDEBUG
2048  // ReadyCycle was been bumped up to the CurrCycle when this node was
2049  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2050  // scheduling, so may now be greater than ReadyCycle.
2051  if (ReadyCycle > CurrCycle)
2052  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2053 #endif
2054 
2055  if (ReadyCycle < MinReadyCycle)
2056  MinReadyCycle = ReadyCycle;
2057 
2058  // Check for interlocks first. For the purpose of other heuristics, an
2059  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2060  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2061  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2062  Available.size() >= ReadyListLimit)
2063  Pending.push(SU);
2064  else
2065  Available.push(SU);
2066 }
2067 
2068 /// Move the boundary of scheduled code by one cycle.
2069 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2070  if (SchedModel->getMicroOpBufferSize() == 0) {
2071  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2072  "MinReadyCycle uninitialized");
2073  if (MinReadyCycle > NextCycle)
2074  NextCycle = MinReadyCycle;
2075  }
2076  // Update the current micro-ops, which will issue in the next cycle.
2077  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2078  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2079 
2080  // Decrement DependentLatency based on the next cycle.
2081  if ((NextCycle - CurrCycle) > DependentLatency)
2082  DependentLatency = 0;
2083  else
2084  DependentLatency -= (NextCycle - CurrCycle);
2085 
2086  if (!HazardRec->isEnabled()) {
2087  // Bypass HazardRec virtual calls.
2088  CurrCycle = NextCycle;
2089  } else {
2090  // Bypass getHazardType calls in case of long latency.
2091  for (; CurrCycle != NextCycle; ++CurrCycle) {
2092  if (isTop())
2093  HazardRec->AdvanceCycle();
2094  else
2095  HazardRec->RecedeCycle();
2096  }
2097  }
2098  CheckPending = true;
2099  IsResourceLimited =
2100  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2101  getScheduledLatency());
2102 
2103  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2104 }
2105 
2106 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2107  ExecutedResCounts[PIdx] += Count;
2108  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2109  MaxExecutedResCount = ExecutedResCounts[PIdx];
2110 }
2111 
2112 /// Add the given processor resource to this scheduled zone.
2113 ///
2114 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2115 /// during which this resource is consumed.
2116 ///
2117 /// \return the next cycle at which the instruction may execute without
2118 /// oversubscribing resources.
2119 unsigned SchedBoundary::
2120 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2121  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2122  unsigned Count = Factor * Cycles;
2123  DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
2124  << " +" << Cycles << "x" << Factor << "u\n");
2125 
2126  // Update Executed resources counts.
2127  incExecutedResources(PIdx, Count);
2128  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2129  Rem->RemainingCounts[PIdx] -= Count;
2130 
2131  // Check if this resource exceeds the current critical resource. If so, it
2132  // becomes the critical resource.
2133  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2134  ZoneCritResIdx = PIdx;
2135  DEBUG(dbgs() << " *** Critical resource "
2136  << SchedModel->getResourceName(PIdx) << ": "
2137  << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2138  }
2139  // For reserved resources, record the highest cycle using the resource.
2140  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2141  if (NextAvailable > CurrCycle) {
2142  DEBUG(dbgs() << " Resource conflict: "
2143  << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2144  << NextAvailable << "\n");
2145  }
2146  return NextAvailable;
2147 }
2148 
2149 /// Move the boundary of scheduled code by one SUnit.
2151  // Update the reservation table.
2152  if (HazardRec->isEnabled()) {
2153  if (!isTop() && SU->isCall) {
2154  // Calls are scheduled with their preceding instructions. For bottom-up
2155  // scheduling, clear the pipeline state before emitting.
2156  HazardRec->Reset();
2157  }
2158  HazardRec->EmitInstruction(SU);
2159  }
2160  // checkHazard should prevent scheduling multiple instructions per cycle that
2161  // exceed the issue width.
2162  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2163  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2164  assert(
2165  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2166  "Cannot schedule this instruction's MicroOps in the current cycle.");
2167 
2168  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2169  DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2170 
2171  unsigned NextCycle = CurrCycle;
2172  switch (SchedModel->getMicroOpBufferSize()) {
2173  case 0:
2174  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2175  break;
2176  case 1:
2177  if (ReadyCycle > NextCycle) {
2178  NextCycle = ReadyCycle;
2179  DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2180  }
2181  break;
2182  default:
2183  // We don't currently model the OOO reorder buffer, so consider all
2184  // scheduled MOps to be "retired". We do loosely model in-order resource
2185  // latency. If this instruction uses an in-order resource, account for any
2186  // likely stall cycles.
2187  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2188  NextCycle = ReadyCycle;
2189  break;
2190  }
2191  RetiredMOps += IncMOps;
2192 
2193  // Update resource counts and critical resource.
2194  if (SchedModel->hasInstrSchedModel()) {
2195  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2196  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2197  Rem->RemIssueCount -= DecRemIssue;
2198  if (ZoneCritResIdx) {
2199  // Scale scheduled micro-ops for comparing with the critical resource.
2200  unsigned ScaledMOps =
2201  RetiredMOps * SchedModel->getMicroOpFactor();
2202 
2203  // If scaled micro-ops are now more than the previous critical resource by
2204  // a full cycle, then micro-ops issue becomes critical.
2205  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2206  >= (int)SchedModel->getLatencyFactor()) {
2207  ZoneCritResIdx = 0;
2208  DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2209  << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2210  }
2211  }
2213  PI = SchedModel->getWriteProcResBegin(SC),
2214  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2215  unsigned RCycle =
2216  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2217  if (RCycle > NextCycle)
2218  NextCycle = RCycle;
2219  }
2220  if (SU->hasReservedResource) {
2221  // For reserved resources, record the highest cycle using the resource.
2222  // For top-down scheduling, this is the cycle in which we schedule this
2223  // instruction plus the number of cycles the operations reserves the
2224  // resource. For bottom-up is it simply the instruction's cycle.
2226  PI = SchedModel->getWriteProcResBegin(SC),
2227  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2228  unsigned PIdx = PI->ProcResourceIdx;
2229  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2230  if (isTop()) {
2231  ReservedCycles[PIdx] =
2232  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2233  }
2234  else
2235  ReservedCycles[PIdx] = NextCycle;
2236  }
2237  }
2238  }
2239  }
2240  // Update ExpectedLatency and DependentLatency.
2241  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2242  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2243  if (SU->getDepth() > TopLatency) {
2244  TopLatency = SU->getDepth();
2245  DEBUG(dbgs() << " " << Available.getName()
2246  << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2247  }
2248  if (SU->getHeight() > BotLatency) {
2249  BotLatency = SU->getHeight();
2250  DEBUG(dbgs() << " " << Available.getName()
2251  << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2252  }
2253  // If we stall for any reason, bump the cycle.
2254  if (NextCycle > CurrCycle)
2255  bumpCycle(NextCycle);
2256  else
2257  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2258  // resource limited. If a stall occurred, bumpCycle does this.
2259  IsResourceLimited =
2260  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2261  getScheduledLatency());
2262 
2263  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2264  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2265  // one cycle. Since we commonly reach the max MOps here, opportunistically
2266  // bump the cycle to avoid uselessly checking everything in the readyQ.
2267  CurrMOps += IncMOps;
2268 
2269  // Bump the cycle count for issue group constraints.
2270  // This must be done after NextCycle has been adjust for all other stalls.
2271  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2272  // currCycle to X.
2273  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2274  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2275  DEBUG(dbgs() << " Bump cycle to "
2276  << (isTop() ? "end" : "begin") << " group\n");
2277  bumpCycle(++NextCycle);
2278  }
2279 
2280  while (CurrMOps >= SchedModel->getIssueWidth()) {
2281  DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2282  << " at cycle " << CurrCycle << '\n');
2283  bumpCycle(++NextCycle);
2284  }
2285  DEBUG(dumpScheduledState());
2286 }
2287 
2288 /// Release pending ready nodes in to the available queue. This makes them
2289 /// visible to heuristics.
2291  // If the available queue is empty, it is safe to reset MinReadyCycle.
2292  if (Available.empty())
2293  MinReadyCycle = std::numeric_limits<unsigned>::max();
2294 
2295  // Check to see if any of the pending instructions are ready to issue. If
2296  // so, add them to the available queue.
2297  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2298  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2299  SUnit *SU = *(Pending.begin()+i);
2300  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2301 
2302  if (ReadyCycle < MinReadyCycle)
2303  MinReadyCycle = ReadyCycle;
2304 
2305  if (!IsBuffered && ReadyCycle > CurrCycle)
2306  continue;
2307 
2308  if (checkHazard(SU))
2309  continue;
2310 
2311  if (Available.size() >= ReadyListLimit)
2312  break;
2313 
2314  Available.push(SU);
2315  Pending.remove(Pending.begin()+i);
2316  --i; --e;
2317  }
2318  CheckPending = false;
2319 }
2320 
2321 /// Remove SU from the ready set for this boundary.
2323  if (Available.isInQueue(SU))
2324  Available.remove(Available.find(SU));
2325  else {
2326  assert(Pending.isInQueue(SU) && "bad ready count");
2327  Pending.remove(Pending.find(SU));
2328  }
2329 }
2330 
2331 /// If this queue only has one ready candidate, return it. As a side effect,
2332 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2333 /// one node is ready. If multiple instructions are ready, return NULL.
2335  if (CheckPending)
2336  releasePending();
2337 
2338  if (CurrMOps > 0) {
2339  // Defer any ready instrs that now have a hazard.
2340  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2341  if (checkHazard(*I)) {
2342  Pending.push(*I);
2343  I = Available.remove(I);
2344  continue;
2345  }
2346  ++I;
2347  }
2348  }
2349  for (unsigned i = 0; Available.empty(); ++i) {
2350 // FIXME: Re-enable assert once PR20057 is resolved.
2351 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2352 // "permanent hazard");
2353  (void)i;
2354  bumpCycle(CurrCycle + 1);
2355  releasePending();
2356  }
2357 
2358  DEBUG(Pending.dump());
2359  DEBUG(Available.dump());
2360 
2361  if (Available.size() == 1)
2362  return *Available.begin();
2363  return nullptr;
2364 }
2365 
2366 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2367 // This is useful information to dump after bumpNode.
2368 // Note that the Queue contents are more useful before pickNodeFromQueue.
2370  unsigned ResFactor;
2371  unsigned ResCount;
2372  if (ZoneCritResIdx) {
2373  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2374  ResCount = getResourceCount(ZoneCritResIdx);
2375  } else {
2376  ResFactor = SchedModel->getMicroOpFactor();
2377  ResCount = RetiredMOps * ResFactor;
2378  }
2379  unsigned LFactor = SchedModel->getLatencyFactor();
2380  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2381  << " Retired: " << RetiredMOps;
2382  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2383  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2384  << ResCount / ResFactor << " "
2385  << SchedModel->getResourceName(ZoneCritResIdx)
2386  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2387  << (IsResourceLimited ? " - Resource" : " - Latency")
2388  << " limited.\n";
2389 }
2390 #endif
2391 
2392 //===----------------------------------------------------------------------===//
2393 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2394 //===----------------------------------------------------------------------===//
2395 
2398  const TargetSchedModel *SchedModel) {
2399  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2400  return;
2401 
2402  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2404  PI = SchedModel->getWriteProcResBegin(SC),
2405  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2406  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2407  ResDelta.CritResources += PI->Cycles;
2408  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2409  ResDelta.DemandedResources += PI->Cycles;
2410  }
2411 }
2412 
2413 /// Set the CandPolicy given a scheduling zone given the current resources and
2414 /// latencies inside and outside the zone.
2415 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2416  SchedBoundary &CurrZone,
2417  SchedBoundary *OtherZone) {
2418  // Apply preemptive heuristics based on the total latency and resources
2419  // inside and outside this zone. Potential stalls should be considered before
2420  // following this policy.
2421 
2422  // Compute remaining latency. We need this both to determine whether the
2423  // overall schedule has become latency-limited and whether the instructions
2424  // outside this zone are resource or latency limited.
2425  //
2426  // The "dependent" latency is updated incrementally during scheduling as the
2427  // max height/depth of scheduled nodes minus the cycles since it was
2428  // scheduled:
2429  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2430  //
2431  // The "independent" latency is the max ready queue depth:
2432  // ILat = max N.depth for N in Available|Pending
2433  //
2434  // RemainingLatency is the greater of independent and dependent latency.
2435  unsigned RemLatency = CurrZone.getDependentLatency();
2436  RemLatency = std::max(RemLatency,
2437  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2438  RemLatency = std::max(RemLatency,
2439  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2440 
2441  // Compute the critical resource outside the zone.
2442  unsigned OtherCritIdx = 0;
2443  unsigned OtherCount =
2444  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2445 
2446  bool OtherResLimited = false;
2447  if (SchedModel->hasInstrSchedModel())
2448  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2449  OtherCount, RemLatency);
2450 
2451  // Schedule aggressively for latency in PostRA mode. We don't check for
2452  // acyclic latency during PostRA, and highly out-of-order processors will
2453  // skip PostRA scheduling.
2454  if (!OtherResLimited) {
2455  if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2456  Policy.ReduceLatency |= true;
2457  DEBUG(dbgs() << " " << CurrZone.Available.getName()
2458  << " RemainingLatency " << RemLatency << " + "
2459  << CurrZone.getCurrCycle() << "c > CritPath "
2460  << Rem.CriticalPath << "\n");
2461  }
2462  }
2463  // If the same resource is limiting inside and outside the zone, do nothing.
2464  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2465  return;
2466 
2467  DEBUG(
2468  if (CurrZone.isResourceLimited()) {
2469  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2470  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2471  << "\n";
2472  }
2473  if (OtherResLimited)
2474  dbgs() << " RemainingLimit: "
2475  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2476  if (!CurrZone.isResourceLimited() && !OtherResLimited)
2477  dbgs() << " Latency limited both directions.\n");
2478 
2479  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2480  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2481 
2482  if (OtherResLimited)
2483  Policy.DemandResIdx = OtherCritIdx;
2484 }
2485 
2486 #ifndef NDEBUG
2489  switch (Reason) {
2490  case NoCand: return "NOCAND ";
2491  case Only1: return "ONLY1 ";
2492  case PhysRegCopy: return "PREG-COPY ";
2493  case RegExcess: return "REG-EXCESS";
2494  case RegCritical: return "REG-CRIT ";
2495  case Stall: return "STALL ";
2496  case Cluster: return "CLUSTER ";
2497  case Weak: return "WEAK ";
2498  case RegMax: return "REG-MAX ";
2499  case ResourceReduce: return "RES-REDUCE";
2500  case ResourceDemand: return "RES-DEMAND";
2501  case TopDepthReduce: return "TOP-DEPTH ";
2502  case TopPathReduce: return "TOP-PATH ";
2503  case BotHeightReduce:return "BOT-HEIGHT";
2504  case BotPathReduce: return "BOT-PATH ";
2505  case NextDefUse: return "DEF-USE ";
2506  case NodeOrder: return "ORDER ";
2507  };
2508  llvm_unreachable("Unknown reason!");
2509 }
2510 
2512  PressureChange P;
2513  unsigned ResIdx = 0;
2514  unsigned Latency = 0;
2515  switch (Cand.Reason) {
2516  default:
2517  break;
2518  case RegExcess:
2519  P = Cand.RPDelta.Excess;
2520  break;
2521  case RegCritical:
2522  P = Cand.RPDelta.CriticalMax;
2523  break;
2524  case RegMax:
2525  P = Cand.RPDelta.CurrentMax;
2526  break;
2527  case ResourceReduce:
2528  ResIdx = Cand.Policy.ReduceResIdx;
2529  break;
2530  case ResourceDemand:
2531  ResIdx = Cand.Policy.DemandResIdx;
2532  break;
2533  case TopDepthReduce:
2534  Latency = Cand.SU->getDepth();
2535  break;
2536  case TopPathReduce:
2537  Latency = Cand.SU->getHeight();
2538  break;
2539  case BotHeightReduce:
2540  Latency = Cand.SU->getHeight();
2541  break;
2542  case BotPathReduce:
2543  Latency = Cand.SU->getDepth();
2544  break;
2545  }
2546  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2547  if (P.isValid())
2548  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2549  << ":" << P.getUnitInc() << " ";
2550  else
2551  dbgs() << " ";
2552  if (ResIdx)
2553  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2554  else
2555  dbgs() << " ";
2556  if (Latency)
2557  dbgs() << " " << Latency << " cycles ";
2558  else
2559  dbgs() << " ";
2560  dbgs() << '\n';
2561 }
2562 #endif
2563 
2564 /// Return true if this heuristic determines order.
2565 static bool tryLess(int TryVal, int CandVal,
2569  if (TryVal < CandVal) {
2570  TryCand.Reason = Reason;
2571  return true;
2572  }
2573  if (TryVal > CandVal) {
2574  if (Cand.Reason > Reason)
2575  Cand.Reason = Reason;
2576  return true;
2577  }
2578  return false;
2579 }
2580 
2581 static bool tryGreater(int TryVal, int CandVal,
2585  if (TryVal > CandVal) {
2586  TryCand.Reason = Reason;
2587  return true;
2588  }
2589  if (TryVal < CandVal) {
2590  if (Cand.Reason > Reason)
2591  Cand.Reason = Reason;
2592  return true;
2593  }
2594  return false;
2595 }
2596 
2599  SchedBoundary &Zone) {
2600  if (Zone.isTop()) {
2601  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2602  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2603  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2604  return true;
2605  }
2606  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2607  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2608  return true;
2609  } else {
2610  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2611  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2612  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2613  return true;
2614  }
2615  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2616  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2617  return true;
2618  }
2619  return false;
2620 }
2621 
2622 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2623  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2624  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2625 }
2626 
2628  tracePick(Cand.Reason, Cand.AtTop);
2629 }
2630 
2632  assert(dag->hasVRegLiveness() &&
2633  "(PreRA)GenericScheduler needs vreg liveness");
2634  DAG = static_cast<ScheduleDAGMILive*>(dag);
2635  SchedModel = DAG->getSchedModel();
2636  TRI = DAG->TRI;
2637 
2638  Rem.init(DAG, SchedModel);
2639  Top.init(DAG, SchedModel, &Rem);
2640  Bot.init(DAG, SchedModel, &Rem);
2641 
2642  // Initialize resource counts.
2643 
2644  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2645  // are disabled, then these HazardRecs will be disabled.
2646  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2647  if (!Top.HazardRec) {
2648  Top.HazardRec =
2649  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2650  Itin, DAG);
2651  }
2652  if (!Bot.HazardRec) {
2653  Bot.HazardRec =
2654  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2655  Itin, DAG);
2656  }
2657  TopCand.SU = nullptr;
2658  BotCand.SU = nullptr;
2659 }
2660 
2661 /// Initialize the per-region scheduling policy.
2664  unsigned NumRegionInstrs) {
2665  const MachineFunction &MF = *Begin->getMF();
2666  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2667 
2668  // Avoid setting up the register pressure tracker for small regions to save
2669  // compile time. As a rough heuristic, only track pressure when the number of
2670  // schedulable instructions exceeds half the integer register file.
2671  RegionPolicy.ShouldTrackPressure = true;
2672  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2673  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2674  if (TLI->isTypeLegal(LegalIntVT)) {
2675  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2676  TLI->getRegClassFor(LegalIntVT));
2677  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2678  }
2679  }
2680 
2681  // For generic targets, we default to bottom-up, because it's simpler and more
2682  // compile-time optimizations have been implemented in that direction.
2683  RegionPolicy.OnlyBottomUp = true;
2684 
2685  // Allow the subtarget to override default policy.
2686  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2687 
2688  // After subtarget overrides, apply command line options.
2689  if (!EnableRegPressure)
2690  RegionPolicy.ShouldTrackPressure = false;
2691 
2692  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2693  // e.g. -misched-bottomup=false allows scheduling in both directions.
2695  "-misched-topdown incompatible with -misched-bottomup");
2696  if (ForceBottomUp.getNumOccurrences() > 0) {
2697  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2698  if (RegionPolicy.OnlyBottomUp)
2699  RegionPolicy.OnlyTopDown = false;
2700  }
2701  if (ForceTopDown.getNumOccurrences() > 0) {
2702  RegionPolicy.OnlyTopDown = ForceTopDown;
2703  if (RegionPolicy.OnlyTopDown)
2704  RegionPolicy.OnlyBottomUp = false;
2705  }
2706 }
2707 
2709  // Cannot completely remove virtual function even in release mode.
2710 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2711  dbgs() << "GenericScheduler RegionPolicy: "
2712  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2713  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2714  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2715  << "\n";
2716 #endif
2717 }
2718 
2719 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2720 /// critical path by more cycles than it takes to drain the instruction buffer.
2721 /// We estimate an upper bounds on in-flight instructions as:
2722 ///
2723 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2724 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2725 /// InFlightResources = InFlightIterations * LoopResources
2726 ///
2727 /// TODO: Check execution resources in addition to IssueCount.
2729  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2730  return;
2731 
2732  // Scaled number of cycles per loop iteration.
2733  unsigned IterCount =
2734  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2735  Rem.RemIssueCount);
2736  // Scaled acyclic critical path.
2737  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2738  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2739  unsigned InFlightCount =
2740  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2741  unsigned BufferLimit =
2742  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2743 
2744  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2745 
2746  DEBUG(dbgs() << "IssueCycles="
2747  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2748  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2749  << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2750  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2751  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2752  if (Rem.IsAcyclicLatencyLimited)
2753  dbgs() << " ACYCLIC LATENCY LIMIT\n");
2754 }
2755 
2757  Rem.CriticalPath = DAG->ExitSU.getDepth();
2758 
2759  // Some roots may not feed into ExitSU. Check all of them in case.
2760  for (const SUnit *SU : Bot.Available) {
2761  if (SU->getDepth() > Rem.CriticalPath)
2762  Rem.CriticalPath = SU->getDepth();
2763  }
2764  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2765  if (DumpCriticalPathLength) {
2766  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2767  }
2768 
2769  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2770  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2771  checkAcyclicLatency();
2772  }
2773 }
2774 
2775 static bool tryPressure(const PressureChange &TryP,
2776  const PressureChange &CandP,
2780  const TargetRegisterInfo *TRI,
2781  const MachineFunction &MF) {
2782  // If one candidate decreases and the other increases, go with it.
2783  // Invalid candidates have UnitInc==0.
2784  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2785  Reason)) {
2786  return true;
2787  }
2788  // Do not compare the magnitude of pressure changes between top and bottom
2789  // boundary.
2790  if (Cand.AtTop != TryCand.AtTop)
2791  return false;
2792 
2793  // If both candidates affect the same set in the same boundary, go with the
2794  // smallest increase.
2795  unsigned TryPSet = TryP.getPSetOrMax();
2796  unsigned CandPSet = CandP.getPSetOrMax();
2797  if (TryPSet == CandPSet) {
2798  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2799  Reason);
2800  }
2801 
2802  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2804 
2805  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2807 
2808  // If the candidates are decreasing pressure, reverse priority.
2809  if (TryP.getUnitInc() < 0)
2810  std::swap(TryRank, CandRank);
2811  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2812 }
2813 
2814 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2815  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2816 }
2817 
2818 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2819 /// their physreg def/use.
2820 ///
2821 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2822 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2823 /// with the operation that produces or consumes the physreg. We'll do this when
2824 /// regalloc has support for parallel copies.
2825 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2826  const MachineInstr *MI = SU->getInstr();
2827  if (!MI->isCopy())
2828  return 0;
2829 
2830  unsigned ScheduledOper = isTop ? 1 : 0;
2831  unsigned UnscheduledOper = isTop ? 0 : 1;
2832  // If we have already scheduled the physreg produce/consumer, immediately
2833  // schedule the copy.
2835  MI->getOperand(ScheduledOper).getReg()))
2836  return 1;
2837  // If the physreg is at the boundary, defer it. Otherwise schedule it
2838  // immediately to free the dependent. We can hoist the copy later.
2839  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2841  MI->getOperand(UnscheduledOper).getReg()))
2842  return AtBoundary ? -1 : 1;
2843  return 0;
2844 }
2845 
2847  bool AtTop,
2848  const RegPressureTracker &RPTracker,
2849  RegPressureTracker &TempTracker) {
2850  Cand.SU = SU;
2851  Cand.AtTop = AtTop;
2852  if (DAG->isTrackingPressure()) {
2853  if (AtTop) {
2854  TempTracker.getMaxDownwardPressureDelta(
2855  Cand.SU->getInstr(),
2856  Cand.RPDelta,
2857  DAG->getRegionCriticalPSets(),
2859  } else {
2860  if (VerifyScheduling) {
2861  TempTracker.getMaxUpwardPressureDelta(
2862  Cand.SU->getInstr(),
2863  &DAG->getPressureDiff(Cand.SU),
2864  Cand.RPDelta,
2865  DAG->getRegionCriticalPSets(),
2867  } else {
2868  RPTracker.getUpwardPressureDelta(
2869  Cand.SU->getInstr(),
2870  DAG->getPressureDiff(Cand.SU),
2871  Cand.RPDelta,
2872  DAG->getRegionCriticalPSets(),
2874  }
2875  }
2876  }
2877  DEBUG(if (Cand.RPDelta.Excess.isValid())
2878  dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
2879  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2880  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2881 }
2882 
2883 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2884 /// hierarchical. This may be more efficient than a graduated cost model because
2885 /// we don't need to evaluate all aspects of the model for each node in the
2886 /// queue. But it's really done to make the heuristics easier to debug and
2887 /// statistically analyze.
2888 ///
2889 /// \param Cand provides the policy and current best candidate.
2890 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2891 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2892 // if Cand is from a different zone than TryCand.
2894  SchedCandidate &TryCand,
2895  SchedBoundary *Zone) {
2896  // Initialize the candidate if needed.
2897  if (!Cand.isValid()) {
2898  TryCand.Reason = NodeOrder;
2899  return;
2900  }
2901 
2902  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2903  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2904  TryCand, Cand, PhysRegCopy))
2905  return;
2906 
2907  // Avoid exceeding the target's limit.
2908  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2909  Cand.RPDelta.Excess,
2910  TryCand, Cand, RegExcess, TRI,
2911  DAG->MF))
2912  return;
2913 
2914  // Avoid increasing the max critical pressure in the scheduled region.
2915  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2916  Cand.RPDelta.CriticalMax,
2917  TryCand, Cand, RegCritical, TRI,
2918  DAG->MF))
2919  return;
2920 
2921  // We only compare a subset of features when comparing nodes between
2922  // Top and Bottom boundary. Some properties are simply incomparable, in many
2923  // other instances we should only override the other boundary if something
2924  // is a clear good pick on one boundary. Skip heuristics that are more
2925  // "tie-breaking" in nature.
2926  bool SameBoundary = Zone != nullptr;
2927  if (SameBoundary) {
2928  // For loops that are acyclic path limited, aggressively schedule for
2929  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2930  // heuristics to take precedence.
2931  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2932  tryLatency(TryCand, Cand, *Zone))
2933  return;
2934 
2935  // Prioritize instructions that read unbuffered resources by stall cycles.
2936  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2937  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2938  return;
2939  }
2940 
2941  // Keep clustered nodes together to encourage downstream peephole
2942  // optimizations which may reduce resource requirements.
2943  //
2944  // This is a best effort to set things up for a post-RA pass. Optimizations
2945  // like generating loads of multiple registers should ideally be done within
2946  // the scheduler pass by combining the loads during DAG postprocessing.
2947  const SUnit *CandNextClusterSU =
2948  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2949  const SUnit *TryCandNextClusterSU =
2950  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2951  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2952  Cand.SU == CandNextClusterSU,
2953  TryCand, Cand, Cluster))
2954  return;
2955 
2956  if (SameBoundary) {
2957  // Weak edges are for clustering and other constraints.
2958  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2959  getWeakLeft(Cand.SU, Cand.AtTop),
2960  TryCand, Cand, Weak))
2961  return;
2962  }
2963 
2964  // Avoid increasing the max pressure of the entire region.
2965  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2966  Cand.RPDelta.CurrentMax,
2967  TryCand, Cand, RegMax, TRI,
2968  DAG->MF))
2969  return;
2970 
2971  if (SameBoundary) {
2972  // Avoid critical resource consumption and balance the schedule.
2973  TryCand.initResourceDelta(DAG, SchedModel);
2975  TryCand, Cand, ResourceReduce))
2976  return;
2979  TryCand, Cand, ResourceDemand))
2980  return;
2981 
2982  // Avoid serializing long latency dependence chains.
2983  // For acyclic path limited loops, latency was already checked above.
2984  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2985  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2986  return;
2987 
2988  // Fall through to original instruction order.
2989  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2990  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2991  TryCand.Reason = NodeOrder;
2992  }
2993  }
2994 }
2995 
2996 /// Pick the best candidate from the queue.
2997 ///
2998 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2999 /// DAG building. To adjust for the current scheduling location we need to
3000 /// maintain the number of vreg uses remaining to be top-scheduled.
3002  const CandPolicy &ZonePolicy,
3003  const RegPressureTracker &RPTracker,
3004  SchedCandidate &Cand) {
3005  // getMaxPressureDelta temporarily modifies the tracker.
3006  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3007 
3008  ReadyQueue &Q = Zone.Available;
3009  for (SUnit *SU : Q) {
3010 
3011  SchedCandidate TryCand(ZonePolicy);
3012  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3013  // Pass SchedBoundary only when comparing nodes from the same boundary.
3014  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3015  tryCandidate(Cand, TryCand, ZoneArg);
3016  if (TryCand.Reason != NoCand) {
3017  // Initialize resource delta if needed in case future heuristics query it.
3018  if (TryCand.ResDelta == SchedResourceDelta())
3019  TryCand.initResourceDelta(DAG, SchedModel);
3020  Cand.setBest(TryCand);
3021  DEBUG(traceCandidate(Cand));
3022  }
3023  }
3024 }
3025 
3026 /// Pick the best candidate node from either the top or bottom queue.
3028  // Schedule as far as possible in the direction of no choice. This is most
3029  // efficient, but also provides the best heuristics for CriticalPSets.
3030  if (SUnit *SU = Bot.pickOnlyChoice()) {
3031  IsTopNode = false;
3032  tracePick(Only1, false);
3033  return SU;
3034  }
3035  if (SUnit *SU = Top.pickOnlyChoice()) {
3036  IsTopNode = true;
3037  tracePick(Only1, true);
3038  return SU;
3039  }
3040  // Set the bottom-up policy based on the state of the current bottom zone and
3041  // the instructions outside the zone, including the top zone.
3042  CandPolicy BotPolicy;
3043  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3044  // Set the top-down policy based on the state of the current top zone and
3045  // the instructions outside the zone, including the bottom zone.
3046  CandPolicy TopPolicy;
3047  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3048 
3049  // See if BotCand is still valid (because we previously scheduled from Top).
3050  DEBUG(dbgs() << "Picking from Bot:\n");
3051  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3052  BotCand.Policy != BotPolicy) {
3053  BotCand.reset(CandPolicy());
3054  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3055  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3056  } else {
3057  DEBUG(traceCandidate(BotCand));
3058 #ifndef NDEBUG
3059  if (VerifyScheduling) {
3060  SchedCandidate TCand;
3061  TCand.reset(CandPolicy());
3062  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3063  assert(TCand.SU == BotCand.SU &&
3064  "Last pick result should correspond to re-picking right now");
3065  }
3066 #endif
3067  }
3068 
3069  // Check if the top Q has a better candidate.
3070  DEBUG(dbgs() << "Picking from Top:\n");
3071  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3072  TopCand.Policy != TopPolicy) {
3073  TopCand.reset(CandPolicy());
3074  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3075  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3076  } else {
3077  DEBUG(traceCandidate(TopCand));
3078 #ifndef NDEBUG
3079  if (VerifyScheduling) {
3080  SchedCandidate TCand;
3081  TCand.reset(CandPolicy());
3082  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3083  assert(TCand.SU == TopCand.SU &&
3084  "Last pick result should correspond to re-picking right now");
3085  }
3086 #endif
3087  }
3088 
3089  // Pick best from BotCand and TopCand.
3090  assert(BotCand.isValid());
3091  assert(TopCand.isValid());
3092  SchedCandidate Cand = BotCand;
3093  TopCand.Reason = NoCand;
3094  tryCandidate(Cand, TopCand, nullptr);
3095  if (TopCand.Reason != NoCand) {
3096  Cand.setBest(TopCand);
3097  DEBUG(traceCandidate(Cand));
3098  }
3099 
3100  IsTopNode = Cand.AtTop;
3101  tracePick(Cand);
3102  return Cand.SU;
3103 }
3104 
3105 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3107  if (DAG->top() == DAG->bottom()) {
3108  assert(Top.Available.empty() && Top.Pending.empty() &&
3109  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3110  return nullptr;
3111  }
3112  SUnit *SU;
3113  do {
3114  if (RegionPolicy.OnlyTopDown) {
3115  SU = Top.pickOnlyChoice();
3116  if (!SU) {
3117  CandPolicy NoPolicy;
3118  TopCand.reset(NoPolicy);
3119  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3120  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3121  tracePick(TopCand);
3122  SU = TopCand.SU;
3123  }
3124  IsTopNode = true;
3125  } else if (RegionPolicy.OnlyBottomUp) {
3126  SU = Bot.pickOnlyChoice();
3127  if (!SU) {
3128  CandPolicy NoPolicy;
3129  BotCand.reset(NoPolicy);
3130  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3131  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3132  tracePick(BotCand);
3133  SU = BotCand.SU;
3134  }
3135  IsTopNode = false;
3136  } else {
3137  SU = pickNodeBidirectional(IsTopNode);
3138  }
3139  } while (SU->isScheduled);
3140 
3141  if (SU->isTopReady())
3142  Top.removeReady(SU);
3143  if (SU->isBottomReady())
3144  Bot.removeReady(SU);
3145 
3146  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3147  return SU;
3148 }
3149 
3151  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3152  if (!isTop)
3153  ++InsertPos;
3154  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3155 
3156  // Find already scheduled copies with a single physreg dependence and move
3157  // them just above the scheduled instruction.
3158  for (SDep &Dep : Deps) {
3159  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3160  continue;
3161  SUnit *DepSU = Dep.getSUnit();
3162  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3163  continue;
3164  MachineInstr *Copy = DepSU->getInstr();
3165  if (!Copy->isCopy())
3166  continue;
3167  DEBUG(dbgs() << " Rescheduling physreg copy ";
3168  Dep.getSUnit()->dump(DAG));
3169  DAG->moveInstruction(Copy, InsertPos);
3170  }
3171 }
3172 
3173 /// Update the scheduler's state after scheduling a node. This is the same node
3174 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3175 /// update it's state based on the current cycle before MachineSchedStrategy
3176 /// does.
3177 ///
3178 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3179 /// them here. See comments in biasPhysRegCopy.
3180 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3181  if (IsTopNode) {
3182  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3183  Top.bumpNode(SU);
3184  if (SU->hasPhysRegUses)
3185  reschedulePhysRegCopies(SU, true);
3186  } else {
3187  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3188  Bot.bumpNode(SU);
3189  if (SU->hasPhysRegDefs)
3190  reschedulePhysRegCopies(SU, false);
3191  }
3192 }
3193 
3194 /// Create the standard converging machine scheduler. This will be used as the
3195 /// default scheduler if the target does not set a default.
3197  ScheduleDAGMILive *DAG =
3198  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3199  // Register DAG post-processors.
3200  //
3201  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3202  // data and pass it to later mutations. Have a single mutation that gathers
3203  // the interesting nodes in one pass.
3205  return DAG;
3206 }
3207 
3209  return createGenericSchedLive(C);
3210 }
3211 
3212 static MachineSchedRegistry
3213 GenericSchedRegistry("converge", "Standard converging scheduler.",
3215 
3216 //===----------------------------------------------------------------------===//
3217 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3218 //===----------------------------------------------------------------------===//
3219 
3221  DAG = Dag;
3222  SchedModel = DAG->getSchedModel();
3223  TRI = DAG->TRI;
3224 
3225  Rem.init(DAG, SchedModel);
3226  Top.init(DAG, SchedModel, &Rem);
3227  BotRoots.clear();
3228 
3229  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3230  // or are disabled, then these HazardRecs will be disabled.
3231  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3232  if (!Top.HazardRec) {
3233  Top.HazardRec =
3235  Itin, DAG);
3236  }
3237 }
3238 
3240  Rem.CriticalPath = DAG->ExitSU.getDepth();
3241 
3242  // Some roots may not feed into ExitSU. Check all of them in case.
3243  for (const SUnit *SU : BotRoots) {
3244  if (SU->getDepth() > Rem.CriticalPath)
3245  Rem.CriticalPath = SU->getDepth();
3246  }
3247  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3248  if (DumpCriticalPathLength) {
3249  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3250  }
3251 }
3252 
3253 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3254 ///
3255 /// \param Cand provides the policy and current best candidate.
3256 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3258  SchedCandidate &TryCand) {
3259  // Initialize the candidate if needed.
3260  if (!Cand.isValid()) {
3261  TryCand.Reason = NodeOrder;
3262  return;
3263  }
3264 
3265  // Prioritize instructions that read unbuffered resources by stall cycles.
3266  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3267  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3268  return;
3269 
3270  // Keep clustered nodes together.
3271  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3272  Cand.SU == DAG->getNextClusterSucc(),
3273  TryCand, Cand, Cluster))
3274  return;
3275 
3276  // Avoid critical resource consumption and balance the schedule.
3277  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3278  TryCand, Cand, ResourceReduce))
3279  return;
3281  Cand.ResDelta.DemandedResources,
3282  TryCand, Cand, ResourceDemand))
3283  return;
3284 
3285  // Avoid serializing long latency dependence chains.
3286  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3287  return;
3288  }
3289 
3290  // Fall through to original instruction order.
3291  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3292  TryCand.Reason = NodeOrder;
3293 }
3294 
3296  ReadyQueue &Q = Top.Available;
3297  for (SUnit *SU : Q) {
3298  SchedCandidate TryCand(Cand.Policy);
3299  TryCand.SU = SU;
3300  TryCand.AtTop = true;
3301  TryCand.initResourceDelta(DAG, SchedModel);
3302  tryCandidate(Cand, TryCand);
3303  if (TryCand.Reason != NoCand) {
3304  Cand.setBest(TryCand);
3305  DEBUG(traceCandidate(Cand));
3306  }
3307  }
3308 }
3309 
3310 /// Pick the next node to schedule.
3312  if (DAG->top() == DAG->bottom()) {
3313  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3314  return nullptr;
3315  }
3316  SUnit *SU;
3317  do {
3318  SU = Top.pickOnlyChoice();
3319  if (SU) {
3320  tracePick(Only1, true);
3321  } else {
3322  CandPolicy NoPolicy;
3323  SchedCandidate TopCand(NoPolicy);
3324  // Set the top-down policy based on the state of the current top zone and
3325  // the instructions outside the zone, including the bottom zone.
3326  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3327  pickNodeFromQueue(TopCand);
3328  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3329  tracePick(TopCand);
3330  SU = TopCand.SU;
3331  }
3332  } while (SU->isScheduled);
3333 
3334  IsTopNode = true;
3335  Top.removeReady(SU);
3336 
3337  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3338  return SU;
3339 }
3340 
3341 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3342 /// scheduled/remaining flags in the DAG nodes.
3343 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3344  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3345  Top.bumpNode(SU);
3346 }
3347 
3349  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3350  /*RemoveKillFlags=*/true);
3351 }
3352 
3353 //===----------------------------------------------------------------------===//
3354 // ILP Scheduler. Currently for experimental analysis of heuristics.
3355 //===----------------------------------------------------------------------===//
3356 
3357 namespace {
3358 
3359 /// \brief Order nodes by the ILP metric.
3360 struct ILPOrder {
3361  const SchedDFSResult *DFSResult = nullptr;
3362  const BitVector *ScheduledTrees = nullptr;
3363  bool MaximizeILP;
3364 
3365  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3366 
3367  /// \brief Apply a less-than relation on node priority.
3368  ///
3369  /// (Return true if A comes after B in the Q.)
3370  bool operator()(const SUnit *A, const SUnit *B) const {
3371  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3372  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3373  if (SchedTreeA != SchedTreeB) {
3374  // Unscheduled trees have lower priority.
3375  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3376  return ScheduledTrees->test(SchedTreeB);
3377 
3378  // Trees with shallower connections have have lower priority.
3379  if (DFSResult->getSubtreeLevel(SchedTreeA)
3380  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3381  return DFSResult->getSubtreeLevel(SchedTreeA)
3382  < DFSResult->getSubtreeLevel(SchedTreeB);
3383  }
3384  }
3385  if (MaximizeILP)
3386  return DFSResult->getILP(A) < DFSResult->getILP(B);
3387  else
3388  return DFSResult->getILP(A) > DFSResult->getILP(B);
3389  }
3390 };
3391 
3392 /// \brief Schedule based on the ILP metric.
3393 class ILPScheduler : public MachineSchedStrategy {
3394  ScheduleDAGMILive *DAG = nullptr;
3395  ILPOrder Cmp;
3396 
3397  std::vector<SUnit*> ReadyQ;
3398 
3399 public:
3400  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3401 
3402  void initialize(ScheduleDAGMI *dag) override {
3403  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3404  DAG = static_cast<ScheduleDAGMILive*>(dag);
3405  DAG->computeDFSResult();
3406  Cmp.DFSResult = DAG->getDFSResult();
3407  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3408  ReadyQ.clear();
3409  }
3410 
3411  void registerRoots() override {
3412  // Restore the heap in ReadyQ with the updated DFS results.
3413  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3414  }
3415 
3416  /// Implement MachineSchedStrategy interface.
3417  /// -----------------------------------------
3418 
3419  /// Callback to select the highest priority node from the ready Q.
3420  SUnit *pickNode(bool &IsTopNode) override {
3421  if (ReadyQ.empty()) return nullptr;
3422  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3423  SUnit *SU = ReadyQ.back();
3424  ReadyQ.pop_back();
3425  IsTopNode = false;
3426  DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3427  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3428  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3429  << DAG->getDFSResult()->getSubtreeLevel(
3430  DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3431  << "Scheduling " << *SU->getInstr());
3432  return SU;
3433  }
3434 
3435  /// \brief Scheduler callback to notify that a new subtree is scheduled.
3436  void scheduleTree(unsigned SubtreeID) override {
3437  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3438  }
3439 
3440  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3441  /// DFSResults, and resort the priority Q.
3442  void schedNode(SUnit *SU, bool IsTopNode) override {
3443  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3444  }
3445 
3446  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3447 
3448  void releaseBottomNode(SUnit *SU) override {
3449  ReadyQ.push_back(SU);
3450  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3451  }
3452 };
3453 
3454 } // end anonymous namespace
3455 
3457  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3458 }
3460  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3461 }
3462 
3464  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3466  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3467 
3468 //===----------------------------------------------------------------------===//
3469 // Machine Instruction Shuffler for Correctness Testing
3470 //===----------------------------------------------------------------------===//
3471 
3472 #ifndef NDEBUG
3473 namespace {
3474 
3475 /// Apply a less-than relation on the node order, which corresponds to the
3476 /// instruction order prior to scheduling. IsReverse implements greater-than.
3477 template<bool IsReverse>
3478 struct SUnitOrder {
3479  bool operator()(SUnit *A, SUnit *B) const {
3480  if (IsReverse)
3481  return A->NodeNum > B->NodeNum;
3482  else
3483  return A->NodeNum < B->NodeNum;
3484  }
3485 };
3486 
3487 /// Reorder instructions as much as possible.
3488 class InstructionShuffler : public MachineSchedStrategy {
3489  bool IsAlternating;
3490  bool IsTopDown;
3491 
3492  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3493  // gives nodes with a higher number higher priority causing the latest
3494  // instructions to be scheduled first.
3495  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3496  TopQ;
3497 
3498  // When scheduling bottom-up, use greater-than as the queue priority.
3500  BottomQ;
3501 
3502 public:
3503  InstructionShuffler(bool alternate, bool topdown)
3504  : IsAlternating(alternate), IsTopDown(topdown) {}
3505 
3506  void initialize(ScheduleDAGMI*) override {
3507  TopQ.clear();
3508  BottomQ.clear();
3509  }
3510 
3511  /// Implement MachineSchedStrategy interface.
3512  /// -----------------------------------------
3513 
3514  SUnit *pickNode(bool &IsTopNode) override {
3515  SUnit *SU;
3516  if (IsTopDown) {
3517  do {
3518  if (TopQ.empty()) return nullptr;
3519  SU = TopQ.top();
3520  TopQ.pop();
3521  } while (SU->isScheduled);
3522  IsTopNode = true;
3523  } else {
3524  do {
3525  if (BottomQ.empty()) return nullptr;
3526  SU = BottomQ.top();
3527  BottomQ.pop();
3528  } while (SU->isScheduled);
3529  IsTopNode = false;
3530  }
3531  if (IsAlternating)
3532  IsTopDown = !IsTopDown;
3533  return SU;
3534  }
3535 
3536  void schedNode(SUnit *SU, bool IsTopNode) override {}
3537 
3538  void releaseTopNode(SUnit *SU) override {
3539  TopQ.push(SU);
3540  }
3541  void releaseBottomNode(SUnit *SU) override {
3542  BottomQ.push(SU);
3543  }
3544 };
3545 
3546 } // end anonymous namespace
3547 
3549  bool Alternate = !ForceTopDown && !ForceBottomUp;
3550  bool TopDown = !ForceBottomUp;
3551  assert((TopDown || !ForceTopDown) &&
3552  "-misched-topdown incompatible with -misched-bottomup");
3553  return new ScheduleDAGMILive(
3554  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3555 }
3556 
3558  "shuffle", "Shuffle machine instructions alternating directions",
3560 #endif // !NDEBUG
3561 
3562 //===----------------------------------------------------------------------===//
3563 // GraphWriter support for ScheduleDAGMILive.
3564 //===----------------------------------------------------------------------===//
3565 
3566 #ifndef NDEBUG
3567 namespace llvm {
3568 
3569 template<> struct GraphTraits<
3571 
3572 template<>
3575 
3576  static std::string getGraphName(const ScheduleDAG *G) {
3577  return G->MF.getName();
3578  }
3579 
3580  static bool renderGraphFromBottomUp() {
3581  return true;
3582  }
3583 
3584  static bool isNodeHidden(const SUnit *Node) {
3585  if (ViewMISchedCutoff == 0)
3586  return false;
3587  return (Node->Preds.size() > ViewMISchedCutoff
3588  || Node->Succs.size() > ViewMISchedCutoff);
3589  }
3590 
3591  /// If you want to override the dot attributes printed for a particular
3592  /// edge, override this method.
3593  static std::string getEdgeAttributes(const SUnit *Node,
3594  SUnitIterator EI,
3595  const ScheduleDAG *Graph) {
3596  if (EI.isArtificialDep())
3597  return "color=cyan,style=dashed";
3598  if (EI.isCtrlDep())
3599  return "color=blue,style=dashed";
3600  return "";
3601  }
3602 
3603  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3604  std::string Str;
3605  raw_string_ostream SS(Str);
3606  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3607  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3608  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3609  SS << "SU:" << SU->NodeNum;
3610  if (DFS)
3611  SS << " I:" << DFS->getNumInstrs(SU);
3612  return SS.str();
3613  }
3614 
3615  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3616  return G->getGraphNodeLabel(SU);
3617  }
3618 
3619  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3620  std::string Str("shape=Mrecord");
3621  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3622  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3623  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3624  if (DFS) {
3625  Str += ",style=filled,fillcolor=\"#";
3626  Str += DOT::getColorString(DFS->getSubtreeID(N));
3627  Str += '"';
3628  }
3629  return Str;
3630  }
3631 };
3632 
3633 } // end namespace llvm
3634 #endif // NDEBUG
3635 
3636 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3637 /// rendered using 'dot'.
3638 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3639 #ifndef NDEBUG
3640  ViewGraph(this, Name, false, Title);
3641 #else
3642  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3643  << "systems with Graphviz or gv!\n";
3644 #endif // NDEBUG
3645 }
3646 
3647 /// Out-of-line implementation with no arguments is handy for gdb.
3649  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3650 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:106
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
static int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:75
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
void push_back(const T &Elt)
Definition: SmallVector.h:212
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:245
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:236
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:329
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
static bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Segments::iterator iterator
Definition: LiveInterval.h:208
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dump(const ScheduleDAG *G) const
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:140
bool test(unsigned Idx) const
Definition: BitVector.h:502
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:454
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:305
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:212
static unsigned getWeakLeft(const SUnit *SU, bool isTop)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Result of a LiveRange query.
Definition: LiveInterval.h:90
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1169
Reg
All possible values of the reg field in the ModR/M byte.
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:233
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:304
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:276
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:273
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:451
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:642
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:380
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:60
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:106
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:367
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Apply a set of heursitics to a new candidate.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
static const unsigned End
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run &#39;dot&#39;, run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:348
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:74
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
Definition: MachineInstr.h:860
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:478
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:275
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
bool isDebugValue() const
Definition: MachineInstr.h:819
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:71
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heursitics to a new candidate for PostRA scheduling.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:181
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:141
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:211
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:629
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:373
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:506
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:207
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:195
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void finishBlock() override
Cleans up after scheduling in the given block.
static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:159
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:660
void resize(size_type N)
Definition: SmallVector.h:353