LLVM  6.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <limits>
64 #include <memory>
65 #include <string>
66 #include <tuple>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "machine-scheduler"
73 
74 namespace llvm {
75 
76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
77  cl::desc("Force top-down list scheduling"));
78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
79  cl::desc("Force bottom-up list scheduling"));
81 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
82  cl::desc("Print critical path length to stdout"));
83 
84 } // end namespace llvm
85 
86 #ifndef NDEBUG
87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
88  cl::desc("Pop up a window to show MISched dags after they are processed"));
89 
90 /// In some situations a few uninteresting nodes depend on nearly all other
91 /// nodes in the graph, provide a cutoff to hide them.
92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
93  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
94 
95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
96  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
97 
98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
99  cl::desc("Only schedule this function"));
100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
101  cl::desc("Only schedule this MBB#"));
102 #else
103 static bool ViewMISchedDAGs = false;
104 #endif // NDEBUG
105 
106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
107 /// size of the ready lists.
108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
109  cl::desc("Limit ready list to N instructions"), cl::init(256));
110 
111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
112  cl::desc("Enable register pressure scheduling."), cl::init(true));
113 
114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
115  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
116 
117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
118  cl::desc("Enable memop clustering."),
119  cl::init(true));
120 
121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
122  cl::desc("Verify machine instrs before and after machine scheduling"));
123 
124 // DAG subtrees must have at least this many nodes.
125 static const unsigned MinSubtreeSize = 8;
126 
127 // Pin the vtables to this file.
128 void MachineSchedStrategy::anchor() {}
129 
130 void ScheduleDAGMutation::anchor() {}
131 
132 //===----------------------------------------------------------------------===//
133 // Machine Instruction Scheduling Pass and Registry
134 //===----------------------------------------------------------------------===//
135 
137  RegClassInfo = new RegisterClassInfo();
138 }
139 
141  delete RegClassInfo;
142 }
143 
144 namespace {
145 
146 /// Base class for a machine scheduler class that can run at any point.
147 class MachineSchedulerBase : public MachineSchedContext,
148  public MachineFunctionPass {
149 public:
150  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
151 
152  void print(raw_ostream &O, const Module* = nullptr) const override;
153 
154 protected:
155  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
156 };
157 
158 /// MachineScheduler runs after coalescing and before register allocation.
159 class MachineScheduler : public MachineSchedulerBase {
160 public:
161  MachineScheduler();
162 
163  void getAnalysisUsage(AnalysisUsage &AU) const override;
164 
165  bool runOnMachineFunction(MachineFunction&) override;
166 
167  static char ID; // Class identification, replacement for typeinfo
168 
169 protected:
170  ScheduleDAGInstrs *createMachineScheduler();
171 };
172 
173 /// PostMachineScheduler runs after shortly before code emission.
174 class PostMachineScheduler : public MachineSchedulerBase {
175 public:
176  PostMachineScheduler();
177 
178  void getAnalysisUsage(AnalysisUsage &AU) const override;
179 
180  bool runOnMachineFunction(MachineFunction&) override;
181 
182  static char ID; // Class identification, replacement for typeinfo
183 
184 protected:
185  ScheduleDAGInstrs *createPostMachineScheduler();
186 };
187 
188 } // end anonymous namespace
189 
190 char MachineScheduler::ID = 0;
191 
193 
194 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
195  "Machine Instruction Scheduler", false, false)
200 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
202 
203 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
205 }
206 
207 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
208  AU.setPreservesCFG();
213  AU.addRequired<SlotIndexes>();
218 }
219 
220 char PostMachineScheduler::ID = 0;
221 
223 
224 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
225  "PostRA Machine Instruction Scheduler", false, false)
226 
227 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
229 }
230 
231 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
232  AU.setPreservesCFG();
237 }
238 
240 
241 /// A dummy default scheduler factory indicates whether the scheduler
242 /// is overridden on the command line.
244  return nullptr;
245 }
246 
247 /// MachineSchedOpt allows command line selection of the scheduler.
250 MachineSchedOpt("misched",
252  cl::desc("Machine instruction scheduler to use"));
253 
255 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
257 
259  "enable-misched",
260  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
261  cl::Hidden);
262 
264  "enable-post-misched",
265  cl::desc("Enable the post-ra machine instruction scheduling pass."),
266  cl::init(true), cl::Hidden);
267 
268 /// Decrement this iterator until reaching the top or a non-debug instr.
272  assert(I != Beg && "reached the top of the region, cannot decrement");
273  while (--I != Beg) {
274  if (!I->isDebugValue())
275  break;
276  }
277  return I;
278 }
279 
280 /// Non-const version.
286 }
287 
288 /// If this iterator is a debug value, increment until reaching the End or a
289 /// non-debug instruction.
293  for(; I != End; ++I) {
294  if (!I->isDebugValue())
295  break;
296  }
297  return I;
298 }
299 
300 /// Non-const version.
306 }
307 
308 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
309 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
310  // Select the scheduler, or set the default.
311  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
312  if (Ctor != useDefaultMachineSched)
313  return Ctor(this);
314 
315  // Get the default scheduler set by the target for this function.
316  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
317  if (Scheduler)
318  return Scheduler;
319 
320  // Default to GenericScheduler.
321  return createGenericSchedLive(this);
322 }
323 
324 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
325 /// the caller. We don't have a command line option to override the postRA
326 /// scheduler. The Target must configure it.
327 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
328  // Get the postRA scheduler set by the target for this function.
329  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
330  if (Scheduler)
331  return Scheduler;
332 
333  // Default to GenericScheduler.
334  return createGenericSchedPostRA(this);
335 }
336 
337 /// Top-level MachineScheduler pass driver.
338 ///
339 /// Visit blocks in function order. Divide each block into scheduling regions
340 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
341 /// consistent with the DAG builder, which traverses the interior of the
342 /// scheduling regions bottom-up.
343 ///
344 /// This design avoids exposing scheduling boundaries to the DAG builder,
345 /// simplifying the DAG builder's support for "special" target instructions.
346 /// At the same time the design allows target schedulers to operate across
347 /// scheduling boundaries, for example to bundle the boudary instructions
348 /// without reordering them. This creates complexity, because the target
349 /// scheduler must update the RegionBegin and RegionEnd positions cached by
350 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
351 /// design would be to split blocks at scheduling boundaries, but LLVM has a
352 /// general bias against block splitting purely for implementation simplicity.
353 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
354  if (skipFunction(*mf.getFunction()))
355  return false;
356 
357  if (EnableMachineSched.getNumOccurrences()) {
358  if (!EnableMachineSched)
359  return false;
360  } else if (!mf.getSubtarget().enableMachineScheduler())
361  return false;
362 
363  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
364 
365  // Initialize the context of the pass.
366  MF = &mf;
367  MLI = &getAnalysis<MachineLoopInfo>();
368  MDT = &getAnalysis<MachineDominatorTree>();
369  PassConfig = &getAnalysis<TargetPassConfig>();
370  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
371 
372  LIS = &getAnalysis<LiveIntervals>();
373 
374  if (VerifyScheduling) {
375  DEBUG(LIS->dump());
376  MF->verify(this, "Before machine scheduling.");
377  }
378  RegClassInfo->runOnMachineFunction(*MF);
379 
380  // Instantiate the selected scheduler for this target, function, and
381  // optimization level.
382  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
383  scheduleRegions(*Scheduler, false);
384 
385  DEBUG(LIS->dump());
386  if (VerifyScheduling)
387  MF->verify(this, "After machine scheduling.");
388  return true;
389 }
390 
391 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
392  if (skipFunction(*mf.getFunction()))
393  return false;
394 
395  if (EnablePostRAMachineSched.getNumOccurrences()) {
396  if (!EnablePostRAMachineSched)
397  return false;
398  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
399  DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
400  return false;
401  }
402  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
403 
404  // Initialize the context of the pass.
405  MF = &mf;
406  MLI = &getAnalysis<MachineLoopInfo>();
407  PassConfig = &getAnalysis<TargetPassConfig>();
408 
409  if (VerifyScheduling)
410  MF->verify(this, "Before post machine scheduling.");
411 
412  // Instantiate the selected scheduler for this target, function, and
413  // optimization level.
414  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
415  scheduleRegions(*Scheduler, true);
416 
417  if (VerifyScheduling)
418  MF->verify(this, "After post machine scheduling.");
419  return true;
420 }
421 
422 /// Return true of the given instruction should not be included in a scheduling
423 /// region.
424 ///
425 /// MachineScheduler does not currently support scheduling across calls. To
426 /// handle calls, the DAG builder needs to be modified to create register
427 /// anti/output dependencies on the registers clobbered by the call's regmask
428 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
429 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
430 /// the boundary, but there would be no benefit to postRA scheduling across
431 /// calls this late anyway.
433  MachineBasicBlock *MBB,
434  MachineFunction *MF,
435  const TargetInstrInfo *TII) {
436  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
437 }
438 
439 /// A region of an MBB for scheduling.
440 namespace {
441 struct SchedRegion {
442  /// RegionBegin is the first instruction in the scheduling region, and
443  /// RegionEnd is either MBB->end() or the scheduling boundary after the
444  /// last instruction in the scheduling region. These iterators cannot refer
445  /// to instructions outside of the identified scheduling region because
446  /// those may be reordered before scheduling this region.
447  MachineBasicBlock::iterator RegionBegin;
448  MachineBasicBlock::iterator RegionEnd;
449  unsigned NumRegionInstrs;
450 
452  unsigned N) :
453  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
454 };
455 } // end anonymous namespace
456 
458 
459 static void
461  MBBRegionsVector &Regions,
462  bool RegionsTopDown) {
463  MachineFunction *MF = MBB->getParent();
464  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
465 
466  MachineBasicBlock::iterator I = nullptr;
467  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
468  RegionEnd != MBB->begin(); RegionEnd = I) {
469 
470  // Avoid decrementing RegionEnd for blocks with no terminator.
471  if (RegionEnd != MBB->end() ||
472  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
473  --RegionEnd;
474  }
475 
476  // The next region starts above the previous region. Look backward in the
477  // instruction stream until we find the nearest boundary.
478  unsigned NumRegionInstrs = 0;
479  I = RegionEnd;
480  for (;I != MBB->begin(); --I) {
481  MachineInstr &MI = *std::prev(I);
482  if (isSchedBoundary(&MI, &*MBB, MF, TII))
483  break;
484  if (!MI.isDebugValue())
485  // MBB::size() uses instr_iterator to count. Here we need a bundle to
486  // count as a single instruction.
487  ++NumRegionInstrs;
488  }
489 
490  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
491  }
492 
493  if (RegionsTopDown)
494  std::reverse(Regions.begin(), Regions.end());
495 }
496 
497 /// Main driver for both MachineScheduler and PostMachineScheduler.
498 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
499  bool FixKillFlags) {
500  // Visit all machine basic blocks.
501  //
502  // TODO: Visit blocks in global postorder or postorder within the bottom-up
503  // loop tree. Then we can optionally compute global RegPressure.
504  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
505  MBB != MBBEnd; ++MBB) {
506 
507  Scheduler.startBlock(&*MBB);
508 
509 #ifndef NDEBUG
510  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
511  continue;
512  if (SchedOnlyBlock.getNumOccurrences()
513  && (int)SchedOnlyBlock != MBB->getNumber())
514  continue;
515 #endif
516 
517  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
518  // points to the scheduling boundary at the bottom of the region. The DAG
519  // does not include RegionEnd, but the region does (i.e. the next
520  // RegionEnd is above the previous RegionBegin). If the current block has
521  // no terminator then RegionEnd == MBB->end() for the bottom region.
522  //
523  // All the regions of MBB are first found and stored in MBBRegions, which
524  // will be processed (MBB) top-down if initialized with true.
525  //
526  // The Scheduler may insert instructions during either schedule() or
527  // exitRegion(), even for empty regions. So the local iterators 'I' and
528  // 'RegionEnd' are invalid across these calls. Instructions must not be
529  // added to other regions than the current one without updating MBBRegions.
530 
531  MBBRegionsVector MBBRegions;
532  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
533  for (MBBRegionsVector::iterator R = MBBRegions.begin();
534  R != MBBRegions.end(); ++R) {
535  MachineBasicBlock::iterator I = R->RegionBegin;
536  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
537  unsigned NumRegionInstrs = R->NumRegionInstrs;
538 
539  // Notify the scheduler of the region, even if we may skip scheduling
540  // it. Perhaps it still needs to be bundled.
541  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
542 
543  // Skip empty scheduling regions (0 or 1 schedulable instructions).
544  if (I == RegionEnd || I == std::prev(RegionEnd)) {
545  // Close the current region. Bundle the terminator if needed.
546  // This invalidates 'RegionEnd' and 'I'.
547  Scheduler.exitRegion();
548  continue;
549  }
550  DEBUG(dbgs() << "********** MI Scheduling **********\n");
551  DEBUG(dbgs() << MF->getName()
552  << ":BB#" << MBB->getNumber() << " " << MBB->getName()
553  << "\n From: " << *I << " To: ";
554  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
555  else dbgs() << "End";
556  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
558  errs() << MF->getName();
559  errs() << ":BB# " << MBB->getNumber();
560  errs() << " " << MBB->getName() << " \n";
561  }
562 
563  // Schedule a region: possibly reorder instructions.
564  // This invalidates the original region iterators.
565  Scheduler.schedule();
566 
567  // Close the current region.
568  Scheduler.exitRegion();
569  }
570  Scheduler.finishBlock();
571  // FIXME: Ideally, no further passes should rely on kill flags. However,
572  // thumb2 size reduction is currently an exception, so the PostMIScheduler
573  // needs to do this.
574  if (FixKillFlags)
575  Scheduler.fixupKills(*MBB);
576  }
577  Scheduler.finalizeSchedule();
578 }
579 
580 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
581  // unimplemented
582 }
583 
584 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
586  dbgs() << "Queue " << Name << ": ";
587  for (const SUnit *SU : Queue)
588  dbgs() << SU->NodeNum << " ";
589  dbgs() << "\n";
590 }
591 #endif
592 
593 //===----------------------------------------------------------------------===//
594 // ScheduleDAGMI - Basic machine instruction scheduling. This is
595 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
596 // virtual registers.
597 // ===----------------------------------------------------------------------===/
598 
599 // Provide a vtable anchor.
601 
602 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
603  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
604 }
605 
606 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
607  if (SuccSU != &ExitSU) {
608  // Do not use WillCreateCycle, it assumes SD scheduling.
609  // If Pred is reachable from Succ, then the edge creates a cycle.
610  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
611  return false;
612  Topo.AddPred(SuccSU, PredDep.getSUnit());
613  }
614  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
615  // Return true regardless of whether a new edge needed to be inserted.
616  return true;
617 }
618 
619 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
620 /// NumPredsLeft reaches zero, release the successor node.
621 ///
622 /// FIXME: Adjust SuccSU height based on MinLatency.
623 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
624  SUnit *SuccSU = SuccEdge->getSUnit();
625 
626  if (SuccEdge->isWeak()) {
627  --SuccSU->WeakPredsLeft;
628  if (SuccEdge->isCluster())
629  NextClusterSucc = SuccSU;
630  return;
631  }
632 #ifndef NDEBUG
633  if (SuccSU->NumPredsLeft == 0) {
634  dbgs() << "*** Scheduling failed! ***\n";
635  SuccSU->dump(this);
636  dbgs() << " has been released too many times!\n";
637  llvm_unreachable(nullptr);
638  }
639 #endif
640  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
641  // CurrCycle may have advanced since then.
642  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
643  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
644 
645  --SuccSU->NumPredsLeft;
646  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
647  SchedImpl->releaseTopNode(SuccSU);
648 }
649 
650 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
652  for (SDep &Succ : SU->Succs)
653  releaseSucc(SU, &Succ);
654 }
655 
656 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
657 /// NumSuccsLeft reaches zero, release the predecessor node.
658 ///
659 /// FIXME: Adjust PredSU height based on MinLatency.
660 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
661  SUnit *PredSU = PredEdge->getSUnit();
662 
663  if (PredEdge->isWeak()) {
664  --PredSU->WeakSuccsLeft;
665  if (PredEdge->isCluster())
666  NextClusterPred = PredSU;
667  return;
668  }
669 #ifndef NDEBUG
670  if (PredSU->NumSuccsLeft == 0) {
671  dbgs() << "*** Scheduling failed! ***\n";
672  PredSU->dump(this);
673  dbgs() << " has been released too many times!\n";
674  llvm_unreachable(nullptr);
675  }
676 #endif
677  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
678  // CurrCycle may have advanced since then.
679  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
680  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
681 
682  --PredSU->NumSuccsLeft;
683  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
684  SchedImpl->releaseBottomNode(PredSU);
685 }
686 
687 /// releasePredecessors - Call releasePred on each of SU's predecessors.
689  for (SDep &Pred : SU->Preds)
690  releasePred(SU, &Pred);
691 }
692 
695  SchedImpl->enterMBB(bb);
696 }
697 
699  SchedImpl->leaveMBB();
701 }
702 
703 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
704 /// crossing a scheduling boundary. [begin, end) includes all instructions in
705 /// the region, including the boundary itself and single-instruction regions
706 /// that don't get scheduled.
710  unsigned regioninstrs)
711 {
712  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
713 
714  SchedImpl->initPolicy(begin, end, regioninstrs);
715 }
716 
717 /// This is normally called from the main scheduler loop but may also be invoked
718 /// by the scheduling strategy to perform additional code motion.
721  // Advance RegionBegin if the first instruction moves down.
722  if (&*RegionBegin == MI)
723  ++RegionBegin;
724 
725  // Update the instruction stream.
726  BB->splice(InsertPos, BB, MI);
727 
728  // Update LiveIntervals
729  if (LIS)
730  LIS->handleMove(*MI, /*UpdateFlags=*/true);
731 
732  // Recede RegionBegin if an instruction moves above the first.
733  if (RegionBegin == InsertPos)
734  RegionBegin = MI;
735 }
736 
738 #ifndef NDEBUG
739  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
740  CurrentTop = CurrentBottom;
741  return false;
742  }
743  ++NumInstrsScheduled;
744 #endif
745  return true;
746 }
747 
748 /// Per-region scheduling driver, called back from
749 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
750 /// does not consider liveness or register pressure. It is useful for PostRA
751 /// scheduling and potentially other custom schedulers.
753  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
754  DEBUG(SchedImpl->dumpPolicy());
755 
756  // Build the DAG.
757  buildSchedGraph(AA);
758 
759  Topo.InitDAGTopologicalSorting();
760 
761  postprocessDAG();
762 
763  SmallVector<SUnit*, 8> TopRoots, BotRoots;
764  findRootsAndBiasEdges(TopRoots, BotRoots);
765 
766  // Initialize the strategy before modifying the DAG.
767  // This may initialize a DFSResult to be used for queue priority.
768  SchedImpl->initialize(this);
769 
770  DEBUG(
771  if (EntrySU.getInstr() != nullptr)
772  EntrySU.dumpAll(this);
773  for (const SUnit &SU : SUnits)
774  SU.dumpAll(this);
775  if (ExitSU.getInstr() != nullptr)
776  ExitSU.dumpAll(this);
777  );
778  if (ViewMISchedDAGs) viewGraph();
779 
780  // Initialize ready queues now that the DAG and priority data are finalized.
781  initQueues(TopRoots, BotRoots);
782 
783  bool IsTopNode = false;
784  while (true) {
785  DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
786  SUnit *SU = SchedImpl->pickNode(IsTopNode);
787  if (!SU) break;
788 
789  assert(!SU->isScheduled && "Node already scheduled");
790  if (!checkSchedLimit())
791  break;
792 
793  MachineInstr *MI = SU->getInstr();
794  if (IsTopNode) {
795  assert(SU->isTopReady() && "node still has unscheduled dependencies");
796  if (&*CurrentTop == MI)
797  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
798  else
799  moveInstruction(MI, CurrentTop);
800  } else {
801  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
803  priorNonDebug(CurrentBottom, CurrentTop);
804  if (&*priorII == MI)
805  CurrentBottom = priorII;
806  else {
807  if (&*CurrentTop == MI)
808  CurrentTop = nextIfDebug(++CurrentTop, priorII);
809  moveInstruction(MI, CurrentBottom);
810  CurrentBottom = MI;
811  }
812  }
813  // Notify the scheduling strategy before updating the DAG.
814  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
815  // runs, it can then use the accurate ReadyCycle time to determine whether
816  // newly released nodes can move to the readyQ.
817  SchedImpl->schedNode(SU, IsTopNode);
818 
819  updateQueues(SU, IsTopNode);
820  }
821  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
822 
823  placeDebugValues();
824 
825  DEBUG({
826  unsigned BBNum = begin()->getParent()->getNumber();
827  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
828  dumpSchedule();
829  dbgs() << '\n';
830  });
831 }
832 
833 /// Apply each ScheduleDAGMutation step in order.
835  for (auto &m : Mutations)
836  m->apply(this);
837 }
838 
839 void ScheduleDAGMI::
841  SmallVectorImpl<SUnit*> &BotRoots) {
842  for (SUnit &SU : SUnits) {
843  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
844 
845  // Order predecessors so DFSResult follows the critical path.
846  SU.biasCriticalPath();
847 
848  // A SUnit is ready to top schedule if it has no predecessors.
849  if (!SU.NumPredsLeft)
850  TopRoots.push_back(&SU);
851  // A SUnit is ready to bottom schedule if it has no successors.
852  if (!SU.NumSuccsLeft)
853  BotRoots.push_back(&SU);
854  }
855  ExitSU.biasCriticalPath();
856 }
857 
858 /// Identify DAG roots and setup scheduler queues.
860  ArrayRef<SUnit*> BotRoots) {
861  NextClusterSucc = nullptr;
862  NextClusterPred = nullptr;
863 
864  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
865  //
866  // Nodes with unreleased weak edges can still be roots.
867  // Release top roots in forward order.
868  for (SUnit *SU : TopRoots)
869  SchedImpl->releaseTopNode(SU);
870 
871  // Release bottom roots in reverse order so the higher priority nodes appear
872  // first. This is more natural and slightly more efficient.
874  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
875  SchedImpl->releaseBottomNode(*I);
876  }
877 
878  releaseSuccessors(&EntrySU);
879  releasePredecessors(&ExitSU);
880 
881  SchedImpl->registerRoots();
882 
883  // Advance past initial DebugValues.
884  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
885  CurrentBottom = RegionEnd;
886 }
887 
888 /// Update scheduler queues after scheduling an instruction.
889 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
890  // Release dependent instructions for scheduling.
891  if (IsTopNode)
892  releaseSuccessors(SU);
893  else
894  releasePredecessors(SU);
895 
896  SU->isScheduled = true;
897 }
898 
899 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
901  // If first instruction was a DBG_VALUE then put it back.
902  if (FirstDbgValue) {
903  BB->splice(RegionBegin, BB, FirstDbgValue);
904  RegionBegin = FirstDbgValue;
905  }
906 
907  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
908  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
909  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
910  MachineInstr *DbgValue = P.first;
911  MachineBasicBlock::iterator OrigPrevMI = P.second;
912  if (&*RegionBegin == DbgValue)
913  ++RegionBegin;
914  BB->splice(++OrigPrevMI, BB, DbgValue);
915  if (OrigPrevMI == std::prev(RegionEnd))
916  RegionEnd = DbgValue;
917  }
918  DbgValues.clear();
919  FirstDbgValue = nullptr;
920 }
921 
922 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
925  if (SUnit *SU = getSUnit(&(*MI)))
926  SU->dump(this);
927  else
928  dbgs() << "Missing SUnit\n";
929  }
930 }
931 #endif
932 
933 //===----------------------------------------------------------------------===//
934 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
935 // preservation.
936 //===----------------------------------------------------------------------===//
937 
939  delete DFSResult;
940 }
941 
943  const MachineInstr &MI = *SU.getInstr();
944  for (const MachineOperand &MO : MI.operands()) {
945  if (!MO.isReg())
946  continue;
947  if (!MO.readsReg())
948  continue;
949  if (TrackLaneMasks && !MO.isUse())
950  continue;
951 
952  unsigned Reg = MO.getReg();
954  continue;
955 
956  // Ignore re-defs.
957  if (TrackLaneMasks) {
958  bool FoundDef = false;
959  for (const MachineOperand &MO2 : MI.operands()) {
960  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
961  FoundDef = true;
962  break;
963  }
964  }
965  if (FoundDef)
966  continue;
967  }
968 
969  // Record this local VReg use.
970  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
971  for (; UI != VRegUses.end(); ++UI) {
972  if (UI->SU == &SU)
973  break;
974  }
975  if (UI == VRegUses.end())
976  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
977  }
978 }
979 
980 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
981 /// crossing a scheduling boundary. [begin, end) includes all instructions in
982 /// the region, including the boundary itself and single-instruction regions
983 /// that don't get scheduled.
987  unsigned regioninstrs)
988 {
989  // ScheduleDAGMI initializes SchedImpl's per-region policy.
990  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
991 
992  // For convenience remember the end of the liveness region.
993  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
994 
995  SUPressureDiffs.clear();
996 
997  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
998  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
999 
1000  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1001  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1002 }
1003 
1004 // Setup the register pressure trackers for the top scheduled top and bottom
1005 // scheduled regions.
1007  VRegUses.clear();
1008  VRegUses.setUniverse(MRI.getNumVirtRegs());
1009  for (SUnit &SU : SUnits)
1010  collectVRegUses(SU);
1011 
1012  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1013  ShouldTrackLaneMasks, false);
1014  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1015  ShouldTrackLaneMasks, false);
1016 
1017  // Close the RPTracker to finalize live ins.
1018  RPTracker.closeRegion();
1019 
1020  DEBUG(RPTracker.dump());
1021 
1022  // Initialize the live ins and live outs.
1023  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1024  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1025 
1026  // Close one end of the tracker so we can call
1027  // getMaxUpward/DownwardPressureDelta before advancing across any
1028  // instructions. This converts currently live regs into live ins/outs.
1029  TopRPTracker.closeTop();
1030  BotRPTracker.closeBottom();
1031 
1032  BotRPTracker.initLiveThru(RPTracker);
1033  if (!BotRPTracker.getLiveThru().empty()) {
1034  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1035  DEBUG(dbgs() << "Live Thru: ";
1036  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1037  };
1038 
1039  // For each live out vreg reduce the pressure change associated with other
1040  // uses of the same vreg below the live-out reaching def.
1041  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1042 
1043  // Account for liveness generated by the region boundary.
1044  if (LiveRegionEnd != RegionEnd) {
1046  BotRPTracker.recede(&LiveUses);
1047  updatePressureDiffs(LiveUses);
1048  }
1049 
1050  DEBUG(
1051  dbgs() << "Top Pressure:\n";
1052  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1053  dbgs() << "Bottom Pressure:\n";
1054  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1055  );
1056 
1057  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
1058 
1059  // Cache the list of excess pressure sets in this region. This will also track
1060  // the max pressure in the scheduled code for these sets.
1061  RegionCriticalPSets.clear();
1062  const std::vector<unsigned> &RegionPressure =
1063  RPTracker.getPressure().MaxSetPressure;
1064  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1065  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1066  if (RegionPressure[i] > Limit) {
1067  DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1068  << " Limit " << Limit
1069  << " Actual " << RegionPressure[i] << "\n");
1070  RegionCriticalPSets.push_back(PressureChange(i));
1071  }
1072  }
1073  DEBUG(dbgs() << "Excess PSets: ";
1074  for (const PressureChange &RCPS : RegionCriticalPSets)
1075  dbgs() << TRI->getRegPressureSetName(
1076  RCPS.getPSet()) << " ";
1077  dbgs() << "\n");
1078 }
1079 
1082  const std::vector<unsigned> &NewMaxPressure) {
1083  const PressureDiff &PDiff = getPressureDiff(SU);
1084  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1085  for (const PressureChange &PC : PDiff) {
1086  if (!PC.isValid())
1087  break;
1088  unsigned ID = PC.getPSet();
1089  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1090  ++CritIdx;
1091  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1092  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1093  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1094  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1095  }
1096  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1097  if (NewMaxPressure[ID] >= Limit - 2) {
1098  DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1099  << NewMaxPressure[ID]
1100  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1101  << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1102  }
1103  }
1104 }
1105 
1106 /// Update the PressureDiff array for liveness after scheduling this
1107 /// instruction.
1109  ArrayRef<RegisterMaskPair> LiveUses) {
1110  for (const RegisterMaskPair &P : LiveUses) {
1111  unsigned Reg = P.RegUnit;
1112  /// FIXME: Currently assuming single-use physregs.
1113  if (!TRI->isVirtualRegister(Reg))
1114  continue;
1115 
1116  if (ShouldTrackLaneMasks) {
1117  // If the register has just become live then other uses won't change
1118  // this fact anymore => decrement pressure.
1119  // If the register has just become dead then other uses make it come
1120  // back to life => increment pressure.
1121  bool Decrement = P.LaneMask.any();
1122 
1123  for (const VReg2SUnit &V2SU
1124  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1125  SUnit &SU = *V2SU.SU;
1126  if (SU.isScheduled || &SU == &ExitSU)
1127  continue;
1128 
1129  PressureDiff &PDiff = getPressureDiff(&SU);
1130  PDiff.addPressureChange(Reg, Decrement, &MRI);
1131  DEBUG(
1132  dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1133  << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1134  << ' ' << *SU.getInstr();
1135  dbgs() << " to ";
1136  PDiff.dump(*TRI);
1137  );
1138  }
1139  } else {
1140  assert(P.LaneMask.any());
1141  DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
1142  // This may be called before CurrentBottom has been initialized. However,
1143  // BotRPTracker must have a valid position. We want the value live into the
1144  // instruction or live out of the block, so ask for the previous
1145  // instruction's live-out.
1146  const LiveInterval &LI = LIS->getInterval(Reg);
1147  VNInfo *VNI;
1149  nextIfDebug(BotRPTracker.getPos(), BB->end());
1150  if (I == BB->end())
1151  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1152  else {
1153  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1154  VNI = LRQ.valueIn();
1155  }
1156  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1157  assert(VNI && "No live value at use.");
1158  for (const VReg2SUnit &V2SU
1159  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1160  SUnit *SU = V2SU.SU;
1161  // If this use comes before the reaching def, it cannot be a last use,
1162  // so decrease its pressure change.
1163  if (!SU->isScheduled && SU != &ExitSU) {
1164  LiveQueryResult LRQ =
1165  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1166  if (LRQ.valueIn() == VNI) {
1167  PressureDiff &PDiff = getPressureDiff(SU);
1168  PDiff.addPressureChange(Reg, true, &MRI);
1169  DEBUG(
1170  dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1171  << *SU->getInstr();
1172  dbgs() << " to ";
1173  PDiff.dump(*TRI);
1174  );
1175  }
1176  }
1177  }
1178  }
1179  }
1180 }
1181 
1182 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1183 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1184 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1185 ///
1186 /// This is a skeletal driver, with all the functionality pushed into helpers,
1187 /// so that it can be easily extended by experimental schedulers. Generally,
1188 /// implementing MachineSchedStrategy should be sufficient to implement a new
1189 /// scheduling algorithm. However, if a scheduler further subclasses
1190 /// ScheduleDAGMILive then it will want to override this virtual method in order
1191 /// to update any specialized state.
1193  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1194  DEBUG(SchedImpl->dumpPolicy());
1195  buildDAGWithRegPressure();
1196 
1197  Topo.InitDAGTopologicalSorting();
1198 
1199  postprocessDAG();
1200 
1201  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202  findRootsAndBiasEdges(TopRoots, BotRoots);
1203 
1204  // Initialize the strategy before modifying the DAG.
1205  // This may initialize a DFSResult to be used for queue priority.
1206  SchedImpl->initialize(this);
1207 
1208  DEBUG(
1209  if (EntrySU.getInstr() != nullptr)
1210  EntrySU.dumpAll(this);
1211  for (const SUnit &SU : SUnits) {
1212  SU.dumpAll(this);
1213  if (ShouldTrackPressure) {
1214  dbgs() << " Pressure Diff : ";
1215  getPressureDiff(&SU).dump(*TRI);
1216  }
1217  dbgs() << " Single Issue : ";
1218  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1219  SchedModel.mustEndGroup(SU.getInstr()))
1220  dbgs() << "true;";
1221  else
1222  dbgs() << "false;";
1223  dbgs() << '\n';
1224  }
1225  if (ExitSU.getInstr() != nullptr)
1226  ExitSU.dumpAll(this);
1227  );
1228  if (ViewMISchedDAGs) viewGraph();
1229 
1230  // Initialize ready queues now that the DAG and priority data are finalized.
1231  initQueues(TopRoots, BotRoots);
1232 
1233  bool IsTopNode = false;
1234  while (true) {
1235  DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1236  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1237  if (!SU) break;
1238 
1239  assert(!SU->isScheduled && "Node already scheduled");
1240  if (!checkSchedLimit())
1241  break;
1242 
1243  scheduleMI(SU, IsTopNode);
1244 
1245  if (DFSResult) {
1246  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1247  if (!ScheduledTrees.test(SubtreeID)) {
1248  ScheduledTrees.set(SubtreeID);
1249  DFSResult->scheduleTree(SubtreeID);
1250  SchedImpl->scheduleTree(SubtreeID);
1251  }
1252  }
1253 
1254  // Notify the scheduling strategy after updating the DAG.
1255  SchedImpl->schedNode(SU, IsTopNode);
1256 
1257  updateQueues(SU, IsTopNode);
1258  }
1259  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1260 
1261  placeDebugValues();
1262 
1263  DEBUG({
1264  unsigned BBNum = begin()->getParent()->getNumber();
1265  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1266  dumpSchedule();
1267  dbgs() << '\n';
1268  });
1269 }
1270 
1271 /// Build the DAG and setup three register pressure trackers.
1273  if (!ShouldTrackPressure) {
1274  RPTracker.reset();
1275  RegionCriticalPSets.clear();
1276  buildSchedGraph(AA);
1277  return;
1278  }
1279 
1280  // Initialize the register pressure tracker used by buildSchedGraph.
1281  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1282  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1283 
1284  // Account for liveness generate by the region boundary.
1285  if (LiveRegionEnd != RegionEnd)
1286  RPTracker.recede();
1287 
1288  // Build the DAG, and compute current register pressure.
1289  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1290 
1291  // Initialize top/bottom trackers after computing region pressure.
1292  initRegPressure();
1293 }
1294 
1296  if (!DFSResult)
1297  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1298  DFSResult->clear();
1299  ScheduledTrees.clear();
1300  DFSResult->resize(SUnits.size());
1301  DFSResult->compute(SUnits);
1302  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1303 }
1304 
1305 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1306 /// only provides the critical path for single block loops. To handle loops that
1307 /// span blocks, we could use the vreg path latencies provided by
1308 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1309 /// available for use in the scheduler.
1310 ///
1311 /// The cyclic path estimation identifies a def-use pair that crosses the back
1312 /// edge and considers the depth and height of the nodes. For example, consider
1313 /// the following instruction sequence where each instruction has unit latency
1314 /// and defines an epomymous virtual register:
1315 ///
1316 /// a->b(a,c)->c(b)->d(c)->exit
1317 ///
1318 /// The cyclic critical path is a two cycles: b->c->b
1319 /// The acyclic critical path is four cycles: a->b->c->d->exit
1320 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1321 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1322 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1323 /// LiveInDepth = depth(b) = len(a->b) = 1
1324 ///
1325 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1326 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1327 /// CyclicCriticalPath = min(2, 2) = 2
1328 ///
1329 /// This could be relevant to PostRA scheduling, but is currently implemented
1330 /// assuming LiveIntervals.
1332  // This only applies to single block loop.
1333  if (!BB->isSuccessor(BB))
1334  return 0;
1335 
1336  unsigned MaxCyclicLatency = 0;
1337  // Visit each live out vreg def to find def/use pairs that cross iterations.
1338  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1339  unsigned Reg = P.RegUnit;
1340  if (!TRI->isVirtualRegister(Reg))
1341  continue;
1342  const LiveInterval &LI = LIS->getInterval(Reg);
1343  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1344  if (!DefVNI)
1345  continue;
1346 
1347  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1348  const SUnit *DefSU = getSUnit(DefMI);
1349  if (!DefSU)
1350  continue;
1351 
1352  unsigned LiveOutHeight = DefSU->getHeight();
1353  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1354  // Visit all local users of the vreg def.
1355  for (const VReg2SUnit &V2SU
1356  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1357  SUnit *SU = V2SU.SU;
1358  if (SU == &ExitSU)
1359  continue;
1360 
1361  // Only consider uses of the phi.
1362  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1363  if (!LRQ.valueIn()->isPHIDef())
1364  continue;
1365 
1366  // Assume that a path spanning two iterations is a cycle, which could
1367  // overestimate in strange cases. This allows cyclic latency to be
1368  // estimated as the minimum slack of the vreg's depth or height.
1369  unsigned CyclicLatency = 0;
1370  if (LiveOutDepth > SU->getDepth())
1371  CyclicLatency = LiveOutDepth - SU->getDepth();
1372 
1373  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1374  if (LiveInHeight > LiveOutHeight) {
1375  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1376  CyclicLatency = LiveInHeight - LiveOutHeight;
1377  } else
1378  CyclicLatency = 0;
1379 
1380  DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1381  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1382  if (CyclicLatency > MaxCyclicLatency)
1383  MaxCyclicLatency = CyclicLatency;
1384  }
1385  }
1386  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1387  return MaxCyclicLatency;
1388 }
1389 
1390 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1391 /// the Top RP tracker in case the region beginning has changed.
1393  ArrayRef<SUnit*> BotRoots) {
1394  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1395  if (ShouldTrackPressure) {
1396  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1397  TopRPTracker.setPos(CurrentTop);
1398  }
1399 }
1400 
1401 /// Move an instruction and update register pressure.
1402 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1403  // Move the instruction to its new location in the instruction stream.
1404  MachineInstr *MI = SU->getInstr();
1405 
1406  if (IsTopNode) {
1407  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1408  if (&*CurrentTop == MI)
1409  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1410  else {
1411  moveInstruction(MI, CurrentTop);
1412  TopRPTracker.setPos(MI);
1413  }
1414 
1415  if (ShouldTrackPressure) {
1416  // Update top scheduled pressure.
1417  RegisterOperands RegOpers;
1418  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1419  if (ShouldTrackLaneMasks) {
1420  // Adjust liveness and add missing dead+read-undef flags.
1421  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1422  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1423  } else {
1424  // Adjust for missing dead-def flags.
1425  RegOpers.detectDeadDefs(*MI, *LIS);
1426  }
1427 
1428  TopRPTracker.advance(RegOpers);
1429  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1430  DEBUG(
1431  dbgs() << "Top Pressure:\n";
1432  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1433  );
1434 
1435  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1436  }
1437  } else {
1438  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1439  MachineBasicBlock::iterator priorII =
1440  priorNonDebug(CurrentBottom, CurrentTop);
1441  if (&*priorII == MI)
1442  CurrentBottom = priorII;
1443  else {
1444  if (&*CurrentTop == MI) {
1445  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1446  TopRPTracker.setPos(CurrentTop);
1447  }
1448  moveInstruction(MI, CurrentBottom);
1449  CurrentBottom = MI;
1450  }
1451  if (ShouldTrackPressure) {
1452  RegisterOperands RegOpers;
1453  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1454  if (ShouldTrackLaneMasks) {
1455  // Adjust liveness and add missing dead+read-undef flags.
1456  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1457  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1458  } else {
1459  // Adjust for missing dead-def flags.
1460  RegOpers.detectDeadDefs(*MI, *LIS);
1461  }
1462 
1463  BotRPTracker.recedeSkipDebugValues();
1465  BotRPTracker.recede(RegOpers, &LiveUses);
1466  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1467  DEBUG(
1468  dbgs() << "Bottom Pressure:\n";
1469  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1470  );
1471 
1472  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1473  updatePressureDiffs(LiveUses);
1474  }
1475  }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1480 //===----------------------------------------------------------------------===//
1481 
1482 namespace {
1483 
1484 /// \brief Post-process the DAG to create cluster edges between neighboring
1485 /// loads or between neighboring stores.
1486 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1487  struct MemOpInfo {
1488  SUnit *SU;
1489  unsigned BaseReg;
1490  int64_t Offset;
1491 
1492  MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1493  : SU(su), BaseReg(reg), Offset(ofs) {}
1494 
1495  bool operator<(const MemOpInfo&RHS) const {
1496  return std::tie(BaseReg, Offset, SU->NodeNum) <
1497  std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1498  }
1499  };
1500 
1501  const TargetInstrInfo *TII;
1502  const TargetRegisterInfo *TRI;
1503  bool IsLoad;
1504 
1505 public:
1506  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1507  const TargetRegisterInfo *tri, bool IsLoad)
1508  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1509 
1510  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1511 
1512 protected:
1513  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1514 };
1515 
1516 class StoreClusterMutation : public BaseMemOpClusterMutation {
1517 public:
1518  StoreClusterMutation(const TargetInstrInfo *tii,
1519  const TargetRegisterInfo *tri)
1520  : BaseMemOpClusterMutation(tii, tri, false) {}
1521 };
1522 
1523 class LoadClusterMutation : public BaseMemOpClusterMutation {
1524 public:
1525  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1526  : BaseMemOpClusterMutation(tii, tri, true) {}
1527 };
1528 
1529 } // end anonymous namespace
1530 
1531 namespace llvm {
1532 
1533 std::unique_ptr<ScheduleDAGMutation>
1535  const TargetRegisterInfo *TRI) {
1536  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1537  : nullptr;
1538 }
1539 
1540 std::unique_ptr<ScheduleDAGMutation>
1542  const TargetRegisterInfo *TRI) {
1543  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1544  : nullptr;
1545 }
1546 
1547 } // end namespace llvm
1548 
1549 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1550  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1551  SmallVector<MemOpInfo, 32> MemOpRecords;
1552  for (SUnit *SU : MemOps) {
1553  unsigned BaseReg;
1554  int64_t Offset;
1555  if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1556  MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1557  }
1558  if (MemOpRecords.size() < 2)
1559  return;
1560 
1561  std::sort(MemOpRecords.begin(), MemOpRecords.end());
1562  unsigned ClusterLength = 1;
1563  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1564  SUnit *SUa = MemOpRecords[Idx].SU;
1565  SUnit *SUb = MemOpRecords[Idx+1].SU;
1566  if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1567  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1568  ClusterLength) &&
1569  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1570  DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1571  << SUb->NodeNum << ")\n");
1572  // Copy successor edges from SUa to SUb. Interleaving computation
1573  // dependent on SUa can prevent load combining due to register reuse.
1574  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1575  // loads should have effectively the same inputs.
1576  for (const SDep &Succ : SUa->Succs) {
1577  if (Succ.getSUnit() == SUb)
1578  continue;
1579  DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
1580  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1581  }
1582  ++ClusterLength;
1583  } else
1584  ClusterLength = 1;
1585  }
1586 }
1587 
1588 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1590  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1591 
1592  // Map DAG NodeNum to store chain ID.
1593  DenseMap<unsigned, unsigned> StoreChainIDs;
1594  // Map each store chain to a set of dependent MemOps.
1595  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1596  for (SUnit &SU : DAG->SUnits) {
1597  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1598  (!IsLoad && !SU.getInstr()->mayStore()))
1599  continue;
1600 
1601  unsigned ChainPredID = DAG->SUnits.size();
1602  for (const SDep &Pred : SU.Preds) {
1603  if (Pred.isCtrl()) {
1604  ChainPredID = Pred.getSUnit()->NodeNum;
1605  break;
1606  }
1607  }
1608  // Check if this chain-like pred has been seen
1609  // before. ChainPredID==MaxNodeID at the top of the schedule.
1610  unsigned NumChains = StoreChainDependents.size();
1611  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1612  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1613  if (Result.second)
1614  StoreChainDependents.resize(NumChains + 1);
1615  StoreChainDependents[Result.first->second].push_back(&SU);
1616  }
1617 
1618  // Iterate over the store chains.
1619  for (auto &SCD : StoreChainDependents)
1620  clusterNeighboringMemOps(SCD, DAG);
1621 }
1622 
1623 //===----------------------------------------------------------------------===//
1624 // CopyConstrain - DAG post-processing to encourage copy elimination.
1625 //===----------------------------------------------------------------------===//
1626 
1627 namespace {
1628 
1629 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1630 /// the one use that defines the copy's source vreg, most likely an induction
1631 /// variable increment.
1632 class CopyConstrain : public ScheduleDAGMutation {
1633  // Transient state.
1634  SlotIndex RegionBeginIdx;
1635 
1636  // RegionEndIdx is the slot index of the last non-debug instruction in the
1637  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1638  SlotIndex RegionEndIdx;
1639 
1640 public:
1641  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1642 
1643  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1644 
1645 protected:
1646  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1647 };
1648 
1649 } // end anonymous namespace
1650 
1651 namespace llvm {
1652 
1653 std::unique_ptr<ScheduleDAGMutation>
1655  const TargetRegisterInfo *TRI) {
1656  return llvm::make_unique<CopyConstrain>(TII, TRI);
1657 }
1658 
1659 } // end namespace llvm
1660 
1661 /// constrainLocalCopy handles two possibilities:
1662 /// 1) Local src:
1663 /// I0: = dst
1664 /// I1: src = ...
1665 /// I2: = dst
1666 /// I3: dst = src (copy)
1667 /// (create pred->succ edges I0->I1, I2->I1)
1668 ///
1669 /// 2) Local copy:
1670 /// I0: dst = src (copy)
1671 /// I1: = dst
1672 /// I2: src = ...
1673 /// I3: = dst
1674 /// (create pred->succ edges I1->I2, I3->I2)
1675 ///
1676 /// Although the MachineScheduler is currently constrained to single blocks,
1677 /// this algorithm should handle extended blocks. An EBB is a set of
1678 /// contiguously numbered blocks such that the previous block in the EBB is
1679 /// always the single predecessor.
1680 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1681  LiveIntervals *LIS = DAG->getLIS();
1682  MachineInstr *Copy = CopySU->getInstr();
1683 
1684  // Check for pure vreg copies.
1685  const MachineOperand &SrcOp = Copy->getOperand(1);
1686  unsigned SrcReg = SrcOp.getReg();
1687  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1688  return;
1689 
1690  const MachineOperand &DstOp = Copy->getOperand(0);
1691  unsigned DstReg = DstOp.getReg();
1692  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1693  return;
1694 
1695  // Check if either the dest or source is local. If it's live across a back
1696  // edge, it's not local. Note that if both vregs are live across the back
1697  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1698  // If both the copy's source and dest are local live intervals, then we
1699  // should treat the dest as the global for the purpose of adding
1700  // constraints. This adds edges from source's other uses to the copy.
1701  unsigned LocalReg = SrcReg;
1702  unsigned GlobalReg = DstReg;
1703  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1704  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1705  LocalReg = DstReg;
1706  GlobalReg = SrcReg;
1707  LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1709  return;
1710  }
1711  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1712 
1713  // Find the global segment after the start of the local LI.
1714  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1715  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1716  // local live range. We could create edges from other global uses to the local
1717  // start, but the coalescer should have already eliminated these cases, so
1718  // don't bother dealing with it.
1719  if (GlobalSegment == GlobalLI->end())
1720  return;
1721 
1722  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1723  // returned the next global segment. But if GlobalSegment overlaps with
1724  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1725  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1726  if (GlobalSegment->contains(LocalLI->beginIndex()))
1727  ++GlobalSegment;
1728 
1729  if (GlobalSegment == GlobalLI->end())
1730  return;
1731 
1732  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1733  if (GlobalSegment != GlobalLI->begin()) {
1734  // Two address defs have no hole.
1735  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1736  GlobalSegment->start)) {
1737  return;
1738  }
1739  // If the prior global segment may be defined by the same two-address
1740  // instruction that also defines LocalLI, then can't make a hole here.
1741  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1742  LocalLI->beginIndex())) {
1743  return;
1744  }
1745  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1746  // it would be a disconnected component in the live range.
1747  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1748  "Disconnected LRG within the scheduling region.");
1749  }
1750  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1751  if (!GlobalDef)
1752  return;
1753 
1754  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1755  if (!GlobalSU)
1756  return;
1757 
1758  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1759  // constraining the uses of the last local def to precede GlobalDef.
1760  SmallVector<SUnit*,8> LocalUses;
1761  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1762  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1763  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1764  for (const SDep &Succ : LastLocalSU->Succs) {
1765  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1766  continue;
1767  if (Succ.getSUnit() == GlobalSU)
1768  continue;
1769  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1770  return;
1771  LocalUses.push_back(Succ.getSUnit());
1772  }
1773  // Open the top of the GlobalLI hole by constraining any earlier global uses
1774  // to precede the start of LocalLI.
1775  SmallVector<SUnit*,8> GlobalUses;
1776  MachineInstr *FirstLocalDef =
1777  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1778  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1779  for (const SDep &Pred : GlobalSU->Preds) {
1780  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1781  continue;
1782  if (Pred.getSUnit() == FirstLocalSU)
1783  continue;
1784  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1785  return;
1786  GlobalUses.push_back(Pred.getSUnit());
1787  }
1788  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1789  // Add the weak edges.
1791  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1792  DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1793  << GlobalSU->NodeNum << ")\n");
1794  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1795  }
1797  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1798  DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1799  << FirstLocalSU->NodeNum << ")\n");
1800  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1801  }
1802 }
1803 
1804 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1805 /// copy elimination.
1806 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1807  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1808  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1809 
1810  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1811  if (FirstPos == DAG->end())
1812  return;
1813  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1814  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1815  *priorNonDebug(DAG->end(), DAG->begin()));
1816 
1817  for (SUnit &SU : DAG->SUnits) {
1818  if (!SU.getInstr()->isCopy())
1819  continue;
1820 
1821  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1822  }
1823 }
1824 
1825 //===----------------------------------------------------------------------===//
1826 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1827 // and possibly other custom schedulers.
1828 //===----------------------------------------------------------------------===//
1829 
1830 static const unsigned InvalidCycle = ~0U;
1831 
1832 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1833 
1834 /// Given a Count of resource usage and a Latency value, return true if a
1835 /// SchedBoundary becomes resource limited.
1836 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1837  unsigned Latency) {
1838  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1839 }
1840 
1842  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1843  // Destroying and reconstructing it is very expensive though. So keep
1844  // invalid, placeholder HazardRecs.
1845  if (HazardRec && HazardRec->isEnabled()) {
1846  delete HazardRec;
1847  HazardRec = nullptr;
1848  }
1849  Available.clear();
1850  Pending.clear();
1851  CheckPending = false;
1852  CurrCycle = 0;
1853  CurrMOps = 0;
1854  MinReadyCycle = std::numeric_limits<unsigned>::max();
1855  ExpectedLatency = 0;
1856  DependentLatency = 0;
1857  RetiredMOps = 0;
1858  MaxExecutedResCount = 0;
1859  ZoneCritResIdx = 0;
1860  IsResourceLimited = false;
1861  ReservedCycles.clear();
1862 #ifndef NDEBUG
1863  // Track the maximum number of stall cycles that could arise either from the
1864  // latency of a DAG edge or the number of cycles that a processor resource is
1865  // reserved (SchedBoundary::ReservedCycles).
1866  MaxObservedStall = 0;
1867 #endif
1868  // Reserve a zero-count for invalid CritResIdx.
1869  ExecutedResCounts.resize(1);
1870  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1871 }
1872 
1873 void SchedRemainder::
1874 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1875  reset();
1876  if (!SchedModel->hasInstrSchedModel())
1877  return;
1878  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1879  for (SUnit &SU : DAG->SUnits) {
1880  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1881  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1882  * SchedModel->getMicroOpFactor();
1884  PI = SchedModel->getWriteProcResBegin(SC),
1885  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1886  unsigned PIdx = PI->ProcResourceIdx;
1887  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1888  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1889  }
1890  }
1891 }
1892 
1893 void SchedBoundary::
1895  reset();
1896  DAG = dag;
1897  SchedModel = smodel;
1898  Rem = rem;
1899  if (SchedModel->hasInstrSchedModel()) {
1900  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1901  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1902  }
1903 }
1904 
1905 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1906 /// these "soft stalls" differently than the hard stall cycles based on CPU
1907 /// resources and computed by checkHazard(). A fully in-order model
1908 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1909 /// available for scheduling until they are ready. However, a weaker in-order
1910 /// model may use this for heuristics. For example, if a processor has in-order
1911 /// behavior when reading certain resources, this may come into play.
1913  if (!SU->isUnbuffered)
1914  return 0;
1915 
1916  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1917  if (ReadyCycle > CurrCycle)
1918  return ReadyCycle - CurrCycle;
1919  return 0;
1920 }
1921 
1922 /// Compute the next cycle at which the given processor resource can be
1923 /// scheduled.
1924 unsigned SchedBoundary::
1925 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1926  unsigned NextUnreserved = ReservedCycles[PIdx];
1927  // If this resource has never been used, always return cycle zero.
1928  if (NextUnreserved == InvalidCycle)
1929  return 0;
1930  // For bottom-up scheduling add the cycles needed for the current operation.
1931  if (!isTop())
1932  NextUnreserved += Cycles;
1933  return NextUnreserved;
1934 }
1935 
1936 /// Does this SU have a hazard within the current instruction group.
1937 ///
1938 /// The scheduler supports two modes of hazard recognition. The first is the
1939 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1940 /// supports highly complicated in-order reservation tables
1941 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1942 ///
1943 /// The second is a streamlined mechanism that checks for hazards based on
1944 /// simple counters that the scheduler itself maintains. It explicitly checks
1945 /// for instruction dispatch limitations, including the number of micro-ops that
1946 /// can dispatch per cycle.
1947 ///
1948 /// TODO: Also check whether the SU must start a new group.
1950  if (HazardRec->isEnabled()
1951  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1952  return true;
1953  }
1954 
1955  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1956  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1957  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1958  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1959  return true;
1960  }
1961 
1962  if (CurrMOps > 0 &&
1963  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1964  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1965  DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1966  << (isTop()? "begin" : "end") << " group\n");
1967  return true;
1968  }
1969 
1970  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1971  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1972  for (const MCWriteProcResEntry &PE :
1973  make_range(SchedModel->getWriteProcResBegin(SC),
1974  SchedModel->getWriteProcResEnd(SC))) {
1975  unsigned ResIdx = PE.ProcResourceIdx;
1976  unsigned Cycles = PE.Cycles;
1977  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1978  if (NRCycle > CurrCycle) {
1979 #ifndef NDEBUG
1980  MaxObservedStall = std::max(Cycles, MaxObservedStall);
1981 #endif
1982  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1983  << SchedModel->getResourceName(ResIdx)
1984  << "=" << NRCycle << "c\n");
1985  return true;
1986  }
1987  }
1988  }
1989  return false;
1990 }
1991 
1992 // Find the unscheduled node in ReadySUs with the highest latency.
1993 unsigned SchedBoundary::
1995  SUnit *LateSU = nullptr;
1996  unsigned RemLatency = 0;
1997  for (SUnit *SU : ReadySUs) {
1998  unsigned L = getUnscheduledLatency(SU);
1999  if (L > RemLatency) {
2000  RemLatency = L;
2001  LateSU = SU;
2002  }
2003  }
2004  if (LateSU) {
2005  DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2006  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2007  }
2008  return RemLatency;
2009 }
2010 
2011 // Count resources in this zone and the remaining unscheduled
2012 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2013 // resource index, or zero if the zone is issue limited.
2014 unsigned SchedBoundary::
2015 getOtherResourceCount(unsigned &OtherCritIdx) {
2016  OtherCritIdx = 0;
2017  if (!SchedModel->hasInstrSchedModel())
2018  return 0;
2019 
2020  unsigned OtherCritCount = Rem->RemIssueCount
2021  + (RetiredMOps * SchedModel->getMicroOpFactor());
2022  DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2023  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2024  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2025  PIdx != PEnd; ++PIdx) {
2026  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2027  if (OtherCount > OtherCritCount) {
2028  OtherCritCount = OtherCount;
2029  OtherCritIdx = PIdx;
2030  }
2031  }
2032  if (OtherCritIdx) {
2033  DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
2034  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2035  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2036  }
2037  return OtherCritCount;
2038 }
2039 
2040 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2041  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2042 
2043 #ifndef NDEBUG
2044  // ReadyCycle was been bumped up to the CurrCycle when this node was
2045  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2046  // scheduling, so may now be greater than ReadyCycle.
2047  if (ReadyCycle > CurrCycle)
2048  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2049 #endif
2050 
2051  if (ReadyCycle < MinReadyCycle)
2052  MinReadyCycle = ReadyCycle;
2053 
2054  // Check for interlocks first. For the purpose of other heuristics, an
2055  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2056  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2057  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2058  Available.size() >= ReadyListLimit)
2059  Pending.push(SU);
2060  else
2061  Available.push(SU);
2062 }
2063 
2064 /// Move the boundary of scheduled code by one cycle.
2065 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2066  if (SchedModel->getMicroOpBufferSize() == 0) {
2067  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2068  "MinReadyCycle uninitialized");
2069  if (MinReadyCycle > NextCycle)
2070  NextCycle = MinReadyCycle;
2071  }
2072  // Update the current micro-ops, which will issue in the next cycle.
2073  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2074  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2075 
2076  // Decrement DependentLatency based on the next cycle.
2077  if ((NextCycle - CurrCycle) > DependentLatency)
2078  DependentLatency = 0;
2079  else
2080  DependentLatency -= (NextCycle - CurrCycle);
2081 
2082  if (!HazardRec->isEnabled()) {
2083  // Bypass HazardRec virtual calls.
2084  CurrCycle = NextCycle;
2085  } else {
2086  // Bypass getHazardType calls in case of long latency.
2087  for (; CurrCycle != NextCycle; ++CurrCycle) {
2088  if (isTop())
2089  HazardRec->AdvanceCycle();
2090  else
2091  HazardRec->RecedeCycle();
2092  }
2093  }
2094  CheckPending = true;
2095  IsResourceLimited =
2096  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2097  getScheduledLatency());
2098 
2099  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2100 }
2101 
2102 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2103  ExecutedResCounts[PIdx] += Count;
2104  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2105  MaxExecutedResCount = ExecutedResCounts[PIdx];
2106 }
2107 
2108 /// Add the given processor resource to this scheduled zone.
2109 ///
2110 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2111 /// during which this resource is consumed.
2112 ///
2113 /// \return the next cycle at which the instruction may execute without
2114 /// oversubscribing resources.
2115 unsigned SchedBoundary::
2116 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2117  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2118  unsigned Count = Factor * Cycles;
2119  DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
2120  << " +" << Cycles << "x" << Factor << "u\n");
2121 
2122  // Update Executed resources counts.
2123  incExecutedResources(PIdx, Count);
2124  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2125  Rem->RemainingCounts[PIdx] -= Count;
2126 
2127  // Check if this resource exceeds the current critical resource. If so, it
2128  // becomes the critical resource.
2129  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2130  ZoneCritResIdx = PIdx;
2131  DEBUG(dbgs() << " *** Critical resource "
2132  << SchedModel->getResourceName(PIdx) << ": "
2133  << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2134  }
2135  // For reserved resources, record the highest cycle using the resource.
2136  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2137  if (NextAvailable > CurrCycle) {
2138  DEBUG(dbgs() << " Resource conflict: "
2139  << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2140  << NextAvailable << "\n");
2141  }
2142  return NextAvailable;
2143 }
2144 
2145 /// Move the boundary of scheduled code by one SUnit.
2147  // Update the reservation table.
2148  if (HazardRec->isEnabled()) {
2149  if (!isTop() && SU->isCall) {
2150  // Calls are scheduled with their preceding instructions. For bottom-up
2151  // scheduling, clear the pipeline state before emitting.
2152  HazardRec->Reset();
2153  }
2154  HazardRec->EmitInstruction(SU);
2155  }
2156  // checkHazard should prevent scheduling multiple instructions per cycle that
2157  // exceed the issue width.
2158  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2159  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2160  assert(
2161  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2162  "Cannot schedule this instruction's MicroOps in the current cycle.");
2163 
2164  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2165  DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2166 
2167  unsigned NextCycle = CurrCycle;
2168  switch (SchedModel->getMicroOpBufferSize()) {
2169  case 0:
2170  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2171  break;
2172  case 1:
2173  if (ReadyCycle > NextCycle) {
2174  NextCycle = ReadyCycle;
2175  DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2176  }
2177  break;
2178  default:
2179  // We don't currently model the OOO reorder buffer, so consider all
2180  // scheduled MOps to be "retired". We do loosely model in-order resource
2181  // latency. If this instruction uses an in-order resource, account for any
2182  // likely stall cycles.
2183  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2184  NextCycle = ReadyCycle;
2185  break;
2186  }
2187  RetiredMOps += IncMOps;
2188 
2189  // Update resource counts and critical resource.
2190  if (SchedModel->hasInstrSchedModel()) {
2191  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2192  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2193  Rem->RemIssueCount -= DecRemIssue;
2194  if (ZoneCritResIdx) {
2195  // Scale scheduled micro-ops for comparing with the critical resource.
2196  unsigned ScaledMOps =
2197  RetiredMOps * SchedModel->getMicroOpFactor();
2198 
2199  // If scaled micro-ops are now more than the previous critical resource by
2200  // a full cycle, then micro-ops issue becomes critical.
2201  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2202  >= (int)SchedModel->getLatencyFactor()) {
2203  ZoneCritResIdx = 0;
2204  DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2205  << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2206  }
2207  }
2209  PI = SchedModel->getWriteProcResBegin(SC),
2210  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2211  unsigned RCycle =
2212  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2213  if (RCycle > NextCycle)
2214  NextCycle = RCycle;
2215  }
2216  if (SU->hasReservedResource) {
2217  // For reserved resources, record the highest cycle using the resource.
2218  // For top-down scheduling, this is the cycle in which we schedule this
2219  // instruction plus the number of cycles the operations reserves the
2220  // resource. For bottom-up is it simply the instruction's cycle.
2222  PI = SchedModel->getWriteProcResBegin(SC),
2223  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2224  unsigned PIdx = PI->ProcResourceIdx;
2225  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2226  if (isTop()) {
2227  ReservedCycles[PIdx] =
2228  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2229  }
2230  else
2231  ReservedCycles[PIdx] = NextCycle;
2232  }
2233  }
2234  }
2235  }
2236  // Update ExpectedLatency and DependentLatency.
2237  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2238  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2239  if (SU->getDepth() > TopLatency) {
2240  TopLatency = SU->getDepth();
2241  DEBUG(dbgs() << " " << Available.getName()
2242  << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2243  }
2244  if (SU->getHeight() > BotLatency) {
2245  BotLatency = SU->getHeight();
2246  DEBUG(dbgs() << " " << Available.getName()
2247  << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2248  }
2249  // If we stall for any reason, bump the cycle.
2250  if (NextCycle > CurrCycle)
2251  bumpCycle(NextCycle);
2252  else
2253  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2254  // resource limited. If a stall occurred, bumpCycle does this.
2255  IsResourceLimited =
2256  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2257  getScheduledLatency());
2258 
2259  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2260  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2261  // one cycle. Since we commonly reach the max MOps here, opportunistically
2262  // bump the cycle to avoid uselessly checking everything in the readyQ.
2263  CurrMOps += IncMOps;
2264 
2265  // Bump the cycle count for issue group constraints.
2266  // This must be done after NextCycle has been adjust for all other stalls.
2267  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2268  // currCycle to X.
2269  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2270  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2271  DEBUG(dbgs() << " Bump cycle to "
2272  << (isTop() ? "end" : "begin") << " group\n");
2273  bumpCycle(++NextCycle);
2274  }
2275 
2276  while (CurrMOps >= SchedModel->getIssueWidth()) {
2277  DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2278  << " at cycle " << CurrCycle << '\n');
2279  bumpCycle(++NextCycle);
2280  }
2281  DEBUG(dumpScheduledState());
2282 }
2283 
2284 /// Release pending ready nodes in to the available queue. This makes them
2285 /// visible to heuristics.
2287  // If the available queue is empty, it is safe to reset MinReadyCycle.
2288  if (Available.empty())
2289  MinReadyCycle = std::numeric_limits<unsigned>::max();
2290 
2291  // Check to see if any of the pending instructions are ready to issue. If
2292  // so, add them to the available queue.
2293  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2294  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2295  SUnit *SU = *(Pending.begin()+i);
2296  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2297 
2298  if (ReadyCycle < MinReadyCycle)
2299  MinReadyCycle = ReadyCycle;
2300 
2301  if (!IsBuffered && ReadyCycle > CurrCycle)
2302  continue;
2303 
2304  if (checkHazard(SU))
2305  continue;
2306 
2307  if (Available.size() >= ReadyListLimit)
2308  break;
2309 
2310  Available.push(SU);
2311  Pending.remove(Pending.begin()+i);
2312  --i; --e;
2313  }
2314  CheckPending = false;
2315 }
2316 
2317 /// Remove SU from the ready set for this boundary.
2319  if (Available.isInQueue(SU))
2320  Available.remove(Available.find(SU));
2321  else {
2322  assert(Pending.isInQueue(SU) && "bad ready count");
2323  Pending.remove(Pending.find(SU));
2324  }
2325 }
2326 
2327 /// If this queue only has one ready candidate, return it. As a side effect,
2328 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2329 /// one node is ready. If multiple instructions are ready, return NULL.
2331  if (CheckPending)
2332  releasePending();
2333 
2334  if (CurrMOps > 0) {
2335  // Defer any ready instrs that now have a hazard.
2336  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2337  if (checkHazard(*I)) {
2338  Pending.push(*I);
2339  I = Available.remove(I);
2340  continue;
2341  }
2342  ++I;
2343  }
2344  }
2345  for (unsigned i = 0; Available.empty(); ++i) {
2346 // FIXME: Re-enable assert once PR20057 is resolved.
2347 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2348 // "permanent hazard");
2349  (void)i;
2350  bumpCycle(CurrCycle + 1);
2351  releasePending();
2352  }
2353 
2354  DEBUG(Pending.dump());
2355  DEBUG(Available.dump());
2356 
2357  if (Available.size() == 1)
2358  return *Available.begin();
2359  return nullptr;
2360 }
2361 
2362 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2363 // This is useful information to dump after bumpNode.
2364 // Note that the Queue contents are more useful before pickNodeFromQueue.
2366  unsigned ResFactor;
2367  unsigned ResCount;
2368  if (ZoneCritResIdx) {
2369  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2370  ResCount = getResourceCount(ZoneCritResIdx);
2371  } else {
2372  ResFactor = SchedModel->getMicroOpFactor();
2373  ResCount = RetiredMOps * ResFactor;
2374  }
2375  unsigned LFactor = SchedModel->getLatencyFactor();
2376  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2377  << " Retired: " << RetiredMOps;
2378  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2379  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2380  << ResCount / ResFactor << " "
2381  << SchedModel->getResourceName(ZoneCritResIdx)
2382  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2383  << (IsResourceLimited ? " - Resource" : " - Latency")
2384  << " limited.\n";
2385 }
2386 #endif
2387 
2388 //===----------------------------------------------------------------------===//
2389 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2390 //===----------------------------------------------------------------------===//
2391 
2394  const TargetSchedModel *SchedModel) {
2395  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2396  return;
2397 
2398  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2400  PI = SchedModel->getWriteProcResBegin(SC),
2401  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2402  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2403  ResDelta.CritResources += PI->Cycles;
2404  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2405  ResDelta.DemandedResources += PI->Cycles;
2406  }
2407 }
2408 
2409 /// Set the CandPolicy given a scheduling zone given the current resources and
2410 /// latencies inside and outside the zone.
2411 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2412  SchedBoundary &CurrZone,
2413  SchedBoundary *OtherZone) {
2414  // Apply preemptive heuristics based on the total latency and resources
2415  // inside and outside this zone. Potential stalls should be considered before
2416  // following this policy.
2417 
2418  // Compute remaining latency. We need this both to determine whether the
2419  // overall schedule has become latency-limited and whether the instructions
2420  // outside this zone are resource or latency limited.
2421  //
2422  // The "dependent" latency is updated incrementally during scheduling as the
2423  // max height/depth of scheduled nodes minus the cycles since it was
2424  // scheduled:
2425  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2426  //
2427  // The "independent" latency is the max ready queue depth:
2428  // ILat = max N.depth for N in Available|Pending
2429  //
2430  // RemainingLatency is the greater of independent and dependent latency.
2431  unsigned RemLatency = CurrZone.getDependentLatency();
2432  RemLatency = std::max(RemLatency,
2433  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2434  RemLatency = std::max(RemLatency,
2435  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2436 
2437  // Compute the critical resource outside the zone.
2438  unsigned OtherCritIdx = 0;
2439  unsigned OtherCount =
2440  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2441 
2442  bool OtherResLimited = false;
2443  if (SchedModel->hasInstrSchedModel())
2444  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2445  OtherCount, RemLatency);
2446 
2447  // Schedule aggressively for latency in PostRA mode. We don't check for
2448  // acyclic latency during PostRA, and highly out-of-order processors will
2449  // skip PostRA scheduling.
2450  if (!OtherResLimited) {
2451  if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2452  Policy.ReduceLatency |= true;
2453  DEBUG(dbgs() << " " << CurrZone.Available.getName()
2454  << " RemainingLatency " << RemLatency << " + "
2455  << CurrZone.getCurrCycle() << "c > CritPath "
2456  << Rem.CriticalPath << "\n");
2457  }
2458  }
2459  // If the same resource is limiting inside and outside the zone, do nothing.
2460  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2461  return;
2462 
2463  DEBUG(
2464  if (CurrZone.isResourceLimited()) {
2465  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2466  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2467  << "\n";
2468  }
2469  if (OtherResLimited)
2470  dbgs() << " RemainingLimit: "
2471  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2472  if (!CurrZone.isResourceLimited() && !OtherResLimited)
2473  dbgs() << " Latency limited both directions.\n");
2474 
2475  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2476  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2477 
2478  if (OtherResLimited)
2479  Policy.DemandResIdx = OtherCritIdx;
2480 }
2481 
2482 #ifndef NDEBUG
2485  switch (Reason) {
2486  case NoCand: return "NOCAND ";
2487  case Only1: return "ONLY1 ";
2488  case PhysRegCopy: return "PREG-COPY ";
2489  case RegExcess: return "REG-EXCESS";
2490  case RegCritical: return "REG-CRIT ";
2491  case Stall: return "STALL ";
2492  case Cluster: return "CLUSTER ";
2493  case Weak: return "WEAK ";
2494  case RegMax: return "REG-MAX ";
2495  case ResourceReduce: return "RES-REDUCE";
2496  case ResourceDemand: return "RES-DEMAND";
2497  case TopDepthReduce: return "TOP-DEPTH ";
2498  case TopPathReduce: return "TOP-PATH ";
2499  case BotHeightReduce:return "BOT-HEIGHT";
2500  case BotPathReduce: return "BOT-PATH ";
2501  case NextDefUse: return "DEF-USE ";
2502  case NodeOrder: return "ORDER ";
2503  };
2504  llvm_unreachable("Unknown reason!");
2505 }
2506 
2508  PressureChange P;
2509  unsigned ResIdx = 0;
2510  unsigned Latency = 0;
2511  switch (Cand.Reason) {
2512  default:
2513  break;
2514  case RegExcess:
2515  P = Cand.RPDelta.Excess;
2516  break;
2517  case RegCritical:
2518  P = Cand.RPDelta.CriticalMax;
2519  break;
2520  case RegMax:
2521  P = Cand.RPDelta.CurrentMax;
2522  break;
2523  case ResourceReduce:
2524  ResIdx = Cand.Policy.ReduceResIdx;
2525  break;
2526  case ResourceDemand:
2527  ResIdx = Cand.Policy.DemandResIdx;
2528  break;
2529  case TopDepthReduce:
2530  Latency = Cand.SU->getDepth();
2531  break;
2532  case TopPathReduce:
2533  Latency = Cand.SU->getHeight();
2534  break;
2535  case BotHeightReduce:
2536  Latency = Cand.SU->getHeight();
2537  break;
2538  case BotPathReduce:
2539  Latency = Cand.SU->getDepth();
2540  break;
2541  }
2542  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2543  if (P.isValid())
2544  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2545  << ":" << P.getUnitInc() << " ";
2546  else
2547  dbgs() << " ";
2548  if (ResIdx)
2549  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2550  else
2551  dbgs() << " ";
2552  if (Latency)
2553  dbgs() << " " << Latency << " cycles ";
2554  else
2555  dbgs() << " ";
2556  dbgs() << '\n';
2557 }
2558 #endif
2559 
2560 /// Return true if this heuristic determines order.
2561 static bool tryLess(int TryVal, int CandVal,
2565  if (TryVal < CandVal) {
2566  TryCand.Reason = Reason;
2567  return true;
2568  }
2569  if (TryVal > CandVal) {
2570  if (Cand.Reason > Reason)
2571  Cand.Reason = Reason;
2572  return true;
2573  }
2574  return false;
2575 }
2576 
2577 static bool tryGreater(int TryVal, int CandVal,
2581  if (TryVal > CandVal) {
2582  TryCand.Reason = Reason;
2583  return true;
2584  }
2585  if (TryVal < CandVal) {
2586  if (Cand.Reason > Reason)
2587  Cand.Reason = Reason;
2588  return true;
2589  }
2590  return false;
2591 }
2592 
2595  SchedBoundary &Zone) {
2596  if (Zone.isTop()) {
2597  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2598  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2599  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2600  return true;
2601  }
2602  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2603  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2604  return true;
2605  } else {
2606  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2607  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2608  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2609  return true;
2610  }
2611  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2612  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2613  return true;
2614  }
2615  return false;
2616 }
2617 
2618 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2619  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2620  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2621 }
2622 
2624  tracePick(Cand.Reason, Cand.AtTop);
2625 }
2626 
2628  assert(dag->hasVRegLiveness() &&
2629  "(PreRA)GenericScheduler needs vreg liveness");
2630  DAG = static_cast<ScheduleDAGMILive*>(dag);
2631  SchedModel = DAG->getSchedModel();
2632  TRI = DAG->TRI;
2633 
2634  Rem.init(DAG, SchedModel);
2635  Top.init(DAG, SchedModel, &Rem);
2636  Bot.init(DAG, SchedModel, &Rem);
2637 
2638  // Initialize resource counts.
2639 
2640  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2641  // are disabled, then these HazardRecs will be disabled.
2642  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2643  if (!Top.HazardRec) {
2644  Top.HazardRec =
2645  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2646  Itin, DAG);
2647  }
2648  if (!Bot.HazardRec) {
2649  Bot.HazardRec =
2650  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2651  Itin, DAG);
2652  }
2653  TopCand.SU = nullptr;
2654  BotCand.SU = nullptr;
2655 }
2656 
2657 /// Initialize the per-region scheduling policy.
2660  unsigned NumRegionInstrs) {
2661  const MachineFunction &MF = *Begin->getMF();
2662  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2663 
2664  // Avoid setting up the register pressure tracker for small regions to save
2665  // compile time. As a rough heuristic, only track pressure when the number of
2666  // schedulable instructions exceeds half the integer register file.
2667  RegionPolicy.ShouldTrackPressure = true;
2668  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2669  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2670  if (TLI->isTypeLegal(LegalIntVT)) {
2671  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2672  TLI->getRegClassFor(LegalIntVT));
2673  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2674  }
2675  }
2676 
2677  // For generic targets, we default to bottom-up, because it's simpler and more
2678  // compile-time optimizations have been implemented in that direction.
2679  RegionPolicy.OnlyBottomUp = true;
2680 
2681  // Allow the subtarget to override default policy.
2682  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2683 
2684  // After subtarget overrides, apply command line options.
2685  if (!EnableRegPressure)
2686  RegionPolicy.ShouldTrackPressure = false;
2687 
2688  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2689  // e.g. -misched-bottomup=false allows scheduling in both directions.
2691  "-misched-topdown incompatible with -misched-bottomup");
2692  if (ForceBottomUp.getNumOccurrences() > 0) {
2693  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2694  if (RegionPolicy.OnlyBottomUp)
2695  RegionPolicy.OnlyTopDown = false;
2696  }
2697  if (ForceTopDown.getNumOccurrences() > 0) {
2698  RegionPolicy.OnlyTopDown = ForceTopDown;
2699  if (RegionPolicy.OnlyTopDown)
2700  RegionPolicy.OnlyBottomUp = false;
2701  }
2702 }
2703 
2705  // Cannot completely remove virtual function even in release mode.
2706 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2707  dbgs() << "GenericScheduler RegionPolicy: "
2708  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2709  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2710  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2711  << "\n";
2712 #endif
2713 }
2714 
2715 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2716 /// critical path by more cycles than it takes to drain the instruction buffer.
2717 /// We estimate an upper bounds on in-flight instructions as:
2718 ///
2719 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2720 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2721 /// InFlightResources = InFlightIterations * LoopResources
2722 ///
2723 /// TODO: Check execution resources in addition to IssueCount.
2725  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2726  return;
2727 
2728  // Scaled number of cycles per loop iteration.
2729  unsigned IterCount =
2730  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2731  Rem.RemIssueCount);
2732  // Scaled acyclic critical path.
2733  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2734  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2735  unsigned InFlightCount =
2736  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2737  unsigned BufferLimit =
2738  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2739 
2740  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2741 
2742  DEBUG(dbgs() << "IssueCycles="
2743  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2744  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2745  << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2746  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2747  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2748  if (Rem.IsAcyclicLatencyLimited)
2749  dbgs() << " ACYCLIC LATENCY LIMIT\n");
2750 }
2751 
2753  Rem.CriticalPath = DAG->ExitSU.getDepth();
2754 
2755  // Some roots may not feed into ExitSU. Check all of them in case.
2756  for (const SUnit *SU : Bot.Available) {
2757  if (SU->getDepth() > Rem.CriticalPath)
2758  Rem.CriticalPath = SU->getDepth();
2759  }
2760  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2761  if (DumpCriticalPathLength) {
2762  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2763  }
2764 
2765  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2766  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2767  checkAcyclicLatency();
2768  }
2769 }
2770 
2771 static bool tryPressure(const PressureChange &TryP,
2772  const PressureChange &CandP,
2776  const TargetRegisterInfo *TRI,
2777  const MachineFunction &MF) {
2778  // If one candidate decreases and the other increases, go with it.
2779  // Invalid candidates have UnitInc==0.
2780  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2781  Reason)) {
2782  return true;
2783  }
2784  // Do not compare the magnitude of pressure changes between top and bottom
2785  // boundary.
2786  if (Cand.AtTop != TryCand.AtTop)
2787  return false;
2788 
2789  // If both candidates affect the same set in the same boundary, go with the
2790  // smallest increase.
2791  unsigned TryPSet = TryP.getPSetOrMax();
2792  unsigned CandPSet = CandP.getPSetOrMax();
2793  if (TryPSet == CandPSet) {
2794  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2795  Reason);
2796  }
2797 
2798  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2800 
2801  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2803 
2804  // If the candidates are decreasing pressure, reverse priority.
2805  if (TryP.getUnitInc() < 0)
2806  std::swap(TryRank, CandRank);
2807  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2808 }
2809 
2810 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2811  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2812 }
2813 
2814 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2815 /// their physreg def/use.
2816 ///
2817 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2818 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2819 /// with the operation that produces or consumes the physreg. We'll do this when
2820 /// regalloc has support for parallel copies.
2821 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2822  const MachineInstr *MI = SU->getInstr();
2823  if (!MI->isCopy())
2824  return 0;
2825 
2826  unsigned ScheduledOper = isTop ? 1 : 0;
2827  unsigned UnscheduledOper = isTop ? 0 : 1;
2828  // If we have already scheduled the physreg produce/consumer, immediately
2829  // schedule the copy.
2831  MI->getOperand(ScheduledOper).getReg()))
2832  return 1;
2833  // If the physreg is at the boundary, defer it. Otherwise schedule it
2834  // immediately to free the dependent. We can hoist the copy later.
2835  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2837  MI->getOperand(UnscheduledOper).getReg()))
2838  return AtBoundary ? -1 : 1;
2839  return 0;
2840 }
2841 
2843  bool AtTop,
2844  const RegPressureTracker &RPTracker,
2845  RegPressureTracker &TempTracker) {
2846  Cand.SU = SU;
2847  Cand.AtTop = AtTop;
2848  if (DAG->isTrackingPressure()) {
2849  if (AtTop) {
2850  TempTracker.getMaxDownwardPressureDelta(
2851  Cand.SU->getInstr(),
2852  Cand.RPDelta,
2853  DAG->getRegionCriticalPSets(),
2855  } else {
2856  if (VerifyScheduling) {
2857  TempTracker.getMaxUpwardPressureDelta(
2858  Cand.SU->getInstr(),
2859  &DAG->getPressureDiff(Cand.SU),
2860  Cand.RPDelta,
2861  DAG->getRegionCriticalPSets(),
2863  } else {
2864  RPTracker.getUpwardPressureDelta(
2865  Cand.SU->getInstr(),
2866  DAG->getPressureDiff(Cand.SU),
2867  Cand.RPDelta,
2868  DAG->getRegionCriticalPSets(),
2870  }
2871  }
2872  }
2873  DEBUG(if (Cand.RPDelta.Excess.isValid())
2874  dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
2875  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2876  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2877 }
2878 
2879 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2880 /// hierarchical. This may be more efficient than a graduated cost model because
2881 /// we don't need to evaluate all aspects of the model for each node in the
2882 /// queue. But it's really done to make the heuristics easier to debug and
2883 /// statistically analyze.
2884 ///
2885 /// \param Cand provides the policy and current best candidate.
2886 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2887 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2888 // if Cand is from a different zone than TryCand.
2890  SchedCandidate &TryCand,
2891  SchedBoundary *Zone) {
2892  // Initialize the candidate if needed.
2893  if (!Cand.isValid()) {
2894  TryCand.Reason = NodeOrder;
2895  return;
2896  }
2897 
2898  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2899  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2900  TryCand, Cand, PhysRegCopy))
2901  return;
2902 
2903  // Avoid exceeding the target's limit.
2904  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2905  Cand.RPDelta.Excess,
2906  TryCand, Cand, RegExcess, TRI,
2907  DAG->MF))
2908  return;
2909 
2910  // Avoid increasing the max critical pressure in the scheduled region.
2911  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2912  Cand.RPDelta.CriticalMax,
2913  TryCand, Cand, RegCritical, TRI,
2914  DAG->MF))
2915  return;
2916 
2917  // We only compare a subset of features when comparing nodes between
2918  // Top and Bottom boundary. Some properties are simply incomparable, in many
2919  // other instances we should only override the other boundary if something
2920  // is a clear good pick on one boundary. Skip heuristics that are more
2921  // "tie-breaking" in nature.
2922  bool SameBoundary = Zone != nullptr;
2923  if (SameBoundary) {
2924  // For loops that are acyclic path limited, aggressively schedule for
2925  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2926  // heuristics to take precedence.
2927  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2928  tryLatency(TryCand, Cand, *Zone))
2929  return;
2930 
2931  // Prioritize instructions that read unbuffered resources by stall cycles.
2932  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2933  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2934  return;
2935  }
2936 
2937  // Keep clustered nodes together to encourage downstream peephole
2938  // optimizations which may reduce resource requirements.
2939  //
2940  // This is a best effort to set things up for a post-RA pass. Optimizations
2941  // like generating loads of multiple registers should ideally be done within
2942  // the scheduler pass by combining the loads during DAG postprocessing.
2943  const SUnit *CandNextClusterSU =
2944  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2945  const SUnit *TryCandNextClusterSU =
2946  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2947  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2948  Cand.SU == CandNextClusterSU,
2949  TryCand, Cand, Cluster))
2950  return;
2951 
2952  if (SameBoundary) {
2953  // Weak edges are for clustering and other constraints.
2954  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2955  getWeakLeft(Cand.SU, Cand.AtTop),
2956  TryCand, Cand, Weak))
2957  return;
2958  }
2959 
2960  // Avoid increasing the max pressure of the entire region.
2961  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2962  Cand.RPDelta.CurrentMax,
2963  TryCand, Cand, RegMax, TRI,
2964  DAG->MF))
2965  return;
2966 
2967  if (SameBoundary) {
2968  // Avoid critical resource consumption and balance the schedule.
2969  TryCand.initResourceDelta(DAG, SchedModel);
2971  TryCand, Cand, ResourceReduce))
2972  return;
2975  TryCand, Cand, ResourceDemand))
2976  return;
2977 
2978  // Avoid serializing long latency dependence chains.
2979  // For acyclic path limited loops, latency was already checked above.
2980  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2981  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2982  return;
2983 
2984  // Fall through to original instruction order.
2985  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2986  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2987  TryCand.Reason = NodeOrder;
2988  }
2989  }
2990 }
2991 
2992 /// Pick the best candidate from the queue.
2993 ///
2994 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2995 /// DAG building. To adjust for the current scheduling location we need to
2996 /// maintain the number of vreg uses remaining to be top-scheduled.
2998  const CandPolicy &ZonePolicy,
2999  const RegPressureTracker &RPTracker,
3000  SchedCandidate &Cand) {
3001  // getMaxPressureDelta temporarily modifies the tracker.
3002  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3003 
3004  ReadyQueue &Q = Zone.Available;
3005  for (SUnit *SU : Q) {
3006 
3007  SchedCandidate TryCand(ZonePolicy);
3008  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3009  // Pass SchedBoundary only when comparing nodes from the same boundary.
3010  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3011  tryCandidate(Cand, TryCand, ZoneArg);
3012  if (TryCand.Reason != NoCand) {
3013  // Initialize resource delta if needed in case future heuristics query it.
3014  if (TryCand.ResDelta == SchedResourceDelta())
3015  TryCand.initResourceDelta(DAG, SchedModel);
3016  Cand.setBest(TryCand);
3017  DEBUG(traceCandidate(Cand));
3018  }
3019  }
3020 }
3021 
3022 /// Pick the best candidate node from either the top or bottom queue.
3024  // Schedule as far as possible in the direction of no choice. This is most
3025  // efficient, but also provides the best heuristics for CriticalPSets.
3026  if (SUnit *SU = Bot.pickOnlyChoice()) {
3027  IsTopNode = false;
3028  tracePick(Only1, false);
3029  return SU;
3030  }
3031  if (SUnit *SU = Top.pickOnlyChoice()) {
3032  IsTopNode = true;
3033  tracePick(Only1, true);
3034  return SU;
3035  }
3036  // Set the bottom-up policy based on the state of the current bottom zone and
3037  // the instructions outside the zone, including the top zone.
3038  CandPolicy BotPolicy;
3039  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3040  // Set the top-down policy based on the state of the current top zone and
3041  // the instructions outside the zone, including the bottom zone.
3042  CandPolicy TopPolicy;
3043  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3044 
3045  // See if BotCand is still valid (because we previously scheduled from Top).
3046  DEBUG(dbgs() << "Picking from Bot:\n");
3047  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3048  BotCand.Policy != BotPolicy) {
3049  BotCand.reset(CandPolicy());
3050  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3051  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3052  } else {
3053  DEBUG(traceCandidate(BotCand));
3054 #ifndef NDEBUG
3055  if (VerifyScheduling) {
3056  SchedCandidate TCand;
3057  TCand.reset(CandPolicy());
3058  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3059  assert(TCand.SU == BotCand.SU &&
3060  "Last pick result should correspond to re-picking right now");
3061  }
3062 #endif
3063  }
3064 
3065  // Check if the top Q has a better candidate.
3066  DEBUG(dbgs() << "Picking from Top:\n");
3067  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3068  TopCand.Policy != TopPolicy) {
3069  TopCand.reset(CandPolicy());
3070  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3071  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3072  } else {
3073  DEBUG(traceCandidate(TopCand));
3074 #ifndef NDEBUG
3075  if (VerifyScheduling) {
3076  SchedCandidate TCand;
3077  TCand.reset(CandPolicy());
3078  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3079  assert(TCand.SU == TopCand.SU &&
3080  "Last pick result should correspond to re-picking right now");
3081  }
3082 #endif
3083  }
3084 
3085  // Pick best from BotCand and TopCand.
3086  assert(BotCand.isValid());
3087  assert(TopCand.isValid());
3088  SchedCandidate Cand = BotCand;
3089  TopCand.Reason = NoCand;
3090  tryCandidate(Cand, TopCand, nullptr);
3091  if (TopCand.Reason != NoCand) {
3092  Cand.setBest(TopCand);
3093  DEBUG(traceCandidate(Cand));
3094  }
3095 
3096  IsTopNode = Cand.AtTop;
3097  tracePick(Cand);
3098  return Cand.SU;
3099 }
3100 
3101 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3103  if (DAG->top() == DAG->bottom()) {
3104  assert(Top.Available.empty() && Top.Pending.empty() &&
3105  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3106  return nullptr;
3107  }
3108  SUnit *SU;
3109  do {
3110  if (RegionPolicy.OnlyTopDown) {
3111  SU = Top.pickOnlyChoice();
3112  if (!SU) {
3113  CandPolicy NoPolicy;
3114  TopCand.reset(NoPolicy);
3115  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3116  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3117  tracePick(TopCand);
3118  SU = TopCand.SU;
3119  }
3120  IsTopNode = true;
3121  } else if (RegionPolicy.OnlyBottomUp) {
3122  SU = Bot.pickOnlyChoice();
3123  if (!SU) {
3124  CandPolicy NoPolicy;
3125  BotCand.reset(NoPolicy);
3126  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3127  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3128  tracePick(BotCand);
3129  SU = BotCand.SU;
3130  }
3131  IsTopNode = false;
3132  } else {
3133  SU = pickNodeBidirectional(IsTopNode);
3134  }
3135  } while (SU->isScheduled);
3136 
3137  if (SU->isTopReady())
3138  Top.removeReady(SU);
3139  if (SU->isBottomReady())
3140  Bot.removeReady(SU);
3141 
3142  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3143  return SU;
3144 }
3145 
3147  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3148  if (!isTop)
3149  ++InsertPos;
3150  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3151 
3152  // Find already scheduled copies with a single physreg dependence and move
3153  // them just above the scheduled instruction.
3154  for (SDep &Dep : Deps) {
3155  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3156  continue;
3157  SUnit *DepSU = Dep.getSUnit();
3158  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3159  continue;
3160  MachineInstr *Copy = DepSU->getInstr();
3161  if (!Copy->isCopy())
3162  continue;
3163  DEBUG(dbgs() << " Rescheduling physreg copy ";
3164  Dep.getSUnit()->dump(DAG));
3165  DAG->moveInstruction(Copy, InsertPos);
3166  }
3167 }
3168 
3169 /// Update the scheduler's state after scheduling a node. This is the same node
3170 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3171 /// update it's state based on the current cycle before MachineSchedStrategy
3172 /// does.
3173 ///
3174 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3175 /// them here. See comments in biasPhysRegCopy.
3176 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3177  if (IsTopNode) {
3178  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3179  Top.bumpNode(SU);
3180  if (SU->hasPhysRegUses)
3181  reschedulePhysRegCopies(SU, true);
3182  } else {
3183  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3184  Bot.bumpNode(SU);
3185  if (SU->hasPhysRegDefs)
3186  reschedulePhysRegCopies(SU, false);
3187  }
3188 }
3189 
3190 /// Create the standard converging machine scheduler. This will be used as the
3191 /// default scheduler if the target does not set a default.
3193  ScheduleDAGMILive *DAG =
3194  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3195  // Register DAG post-processors.
3196  //
3197  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3198  // data and pass it to later mutations. Have a single mutation that gathers
3199  // the interesting nodes in one pass.
3201  return DAG;
3202 }
3203 
3205  return createGenericSchedLive(C);
3206 }
3207 
3208 static MachineSchedRegistry
3209 GenericSchedRegistry("converge", "Standard converging scheduler.",
3211 
3212 //===----------------------------------------------------------------------===//
3213 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3214 //===----------------------------------------------------------------------===//
3215 
3217  DAG = Dag;
3218  SchedModel = DAG->getSchedModel();
3219  TRI = DAG->TRI;
3220 
3221  Rem.init(DAG, SchedModel);
3222  Top.init(DAG, SchedModel, &Rem);
3223  BotRoots.clear();
3224 
3225  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3226  // or are disabled, then these HazardRecs will be disabled.
3227  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3228  if (!Top.HazardRec) {
3229  Top.HazardRec =
3231  Itin, DAG);
3232  }
3233 }
3234 
3236  Rem.CriticalPath = DAG->ExitSU.getDepth();
3237 
3238  // Some roots may not feed into ExitSU. Check all of them in case.
3239  for (const SUnit *SU : BotRoots) {
3240  if (SU->getDepth() > Rem.CriticalPath)
3241  Rem.CriticalPath = SU->getDepth();
3242  }
3243  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3244  if (DumpCriticalPathLength) {
3245  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3246  }
3247 }
3248 
3249 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3250 ///
3251 /// \param Cand provides the policy and current best candidate.
3252 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3254  SchedCandidate &TryCand) {
3255  // Initialize the candidate if needed.
3256  if (!Cand.isValid()) {
3257  TryCand.Reason = NodeOrder;
3258  return;
3259  }
3260 
3261  // Prioritize instructions that read unbuffered resources by stall cycles.
3262  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3263  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3264  return;
3265 
3266  // Keep clustered nodes together.
3267  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3268  Cand.SU == DAG->getNextClusterSucc(),
3269  TryCand, Cand, Cluster))
3270  return;
3271 
3272  // Avoid critical resource consumption and balance the schedule.
3273  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3274  TryCand, Cand, ResourceReduce))
3275  return;
3277  Cand.ResDelta.DemandedResources,
3278  TryCand, Cand, ResourceDemand))
3279  return;
3280 
3281  // Avoid serializing long latency dependence chains.
3282  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3283  return;
3284  }
3285 
3286  // Fall through to original instruction order.
3287  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3288  TryCand.Reason = NodeOrder;
3289 }
3290 
3292  ReadyQueue &Q = Top.Available;
3293  for (SUnit *SU : Q) {
3294  SchedCandidate TryCand(Cand.Policy);
3295  TryCand.SU = SU;
3296  TryCand.AtTop = true;
3297  TryCand.initResourceDelta(DAG, SchedModel);
3298  tryCandidate(Cand, TryCand);
3299  if (TryCand.Reason != NoCand) {
3300  Cand.setBest(TryCand);
3301  DEBUG(traceCandidate(Cand));
3302  }
3303  }
3304 }
3305 
3306 /// Pick the next node to schedule.
3308  if (DAG->top() == DAG->bottom()) {
3309  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3310  return nullptr;
3311  }
3312  SUnit *SU;
3313  do {
3314  SU = Top.pickOnlyChoice();
3315  if (SU) {
3316  tracePick(Only1, true);
3317  } else {
3318  CandPolicy NoPolicy;
3319  SchedCandidate TopCand(NoPolicy);
3320  // Set the top-down policy based on the state of the current top zone and
3321  // the instructions outside the zone, including the bottom zone.
3322  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3323  pickNodeFromQueue(TopCand);
3324  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3325  tracePick(TopCand);
3326  SU = TopCand.SU;
3327  }
3328  } while (SU->isScheduled);
3329 
3330  IsTopNode = true;
3331  Top.removeReady(SU);
3332 
3333  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3334  return SU;
3335 }
3336 
3337 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3338 /// scheduled/remaining flags in the DAG nodes.
3339 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3340  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3341  Top.bumpNode(SU);
3342 }
3343 
3345  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3346  /*RemoveKillFlags=*/true);
3347 }
3348 
3349 //===----------------------------------------------------------------------===//
3350 // ILP Scheduler. Currently for experimental analysis of heuristics.
3351 //===----------------------------------------------------------------------===//
3352 
3353 namespace {
3354 
3355 /// \brief Order nodes by the ILP metric.
3356 struct ILPOrder {
3357  const SchedDFSResult *DFSResult = nullptr;
3358  const BitVector *ScheduledTrees = nullptr;
3359  bool MaximizeILP;
3360 
3361  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3362 
3363  /// \brief Apply a less-than relation on node priority.
3364  ///
3365  /// (Return true if A comes after B in the Q.)
3366  bool operator()(const SUnit *A, const SUnit *B) const {
3367  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3368  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3369  if (SchedTreeA != SchedTreeB) {
3370  // Unscheduled trees have lower priority.
3371  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3372  return ScheduledTrees->test(SchedTreeB);
3373 
3374  // Trees with shallower connections have have lower priority.
3375  if (DFSResult->getSubtreeLevel(SchedTreeA)
3376  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3377  return DFSResult->getSubtreeLevel(SchedTreeA)
3378  < DFSResult->getSubtreeLevel(SchedTreeB);
3379  }
3380  }
3381  if (MaximizeILP)
3382  return DFSResult->getILP(A) < DFSResult->getILP(B);
3383  else
3384  return DFSResult->getILP(A) > DFSResult->getILP(B);
3385  }
3386 };
3387 
3388 /// \brief Schedule based on the ILP metric.
3389 class ILPScheduler : public MachineSchedStrategy {
3390  ScheduleDAGMILive *DAG = nullptr;
3391  ILPOrder Cmp;
3392 
3393  std::vector<SUnit*> ReadyQ;
3394 
3395 public:
3396  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3397 
3398  void initialize(ScheduleDAGMI *dag) override {
3399  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3400  DAG = static_cast<ScheduleDAGMILive*>(dag);
3401  DAG->computeDFSResult();
3402  Cmp.DFSResult = DAG->getDFSResult();
3403  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3404  ReadyQ.clear();
3405  }
3406 
3407  void registerRoots() override {
3408  // Restore the heap in ReadyQ with the updated DFS results.
3409  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3410  }
3411 
3412  /// Implement MachineSchedStrategy interface.
3413  /// -----------------------------------------
3414 
3415  /// Callback to select the highest priority node from the ready Q.
3416  SUnit *pickNode(bool &IsTopNode) override {
3417  if (ReadyQ.empty()) return nullptr;
3418  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3419  SUnit *SU = ReadyQ.back();
3420  ReadyQ.pop_back();
3421  IsTopNode = false;
3422  DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3423  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3424  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3425  << DAG->getDFSResult()->getSubtreeLevel(
3426  DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3427  << "Scheduling " << *SU->getInstr());
3428  return SU;
3429  }
3430 
3431  /// \brief Scheduler callback to notify that a new subtree is scheduled.
3432  void scheduleTree(unsigned SubtreeID) override {
3433  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3434  }
3435 
3436  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3437  /// DFSResults, and resort the priority Q.
3438  void schedNode(SUnit *SU, bool IsTopNode) override {
3439  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3440  }
3441 
3442  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3443 
3444  void releaseBottomNode(SUnit *SU) override {
3445  ReadyQ.push_back(SU);
3446  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3447  }
3448 };
3449 
3450 } // end anonymous namespace
3451 
3453  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3454 }
3456  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3457 }
3458 
3460  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3462  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3463 
3464 //===----------------------------------------------------------------------===//
3465 // Machine Instruction Shuffler for Correctness Testing
3466 //===----------------------------------------------------------------------===//
3467 
3468 #ifndef NDEBUG
3469 namespace {
3470 
3471 /// Apply a less-than relation on the node order, which corresponds to the
3472 /// instruction order prior to scheduling. IsReverse implements greater-than.
3473 template<bool IsReverse>
3474 struct SUnitOrder {
3475  bool operator()(SUnit *A, SUnit *B) const {
3476  if (IsReverse)
3477  return A->NodeNum > B->NodeNum;
3478  else
3479  return A->NodeNum < B->NodeNum;
3480  }
3481 };
3482 
3483 /// Reorder instructions as much as possible.
3484 class InstructionShuffler : public MachineSchedStrategy {
3485  bool IsAlternating;
3486  bool IsTopDown;
3487 
3488  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3489  // gives nodes with a higher number higher priority causing the latest
3490  // instructions to be scheduled first.
3491  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3492  TopQ;
3493 
3494  // When scheduling bottom-up, use greater-than as the queue priority.
3496  BottomQ;
3497 
3498 public:
3499  InstructionShuffler(bool alternate, bool topdown)
3500  : IsAlternating(alternate), IsTopDown(topdown) {}
3501 
3502  void initialize(ScheduleDAGMI*) override {
3503  TopQ.clear();
3504  BottomQ.clear();
3505  }
3506 
3507  /// Implement MachineSchedStrategy interface.
3508  /// -----------------------------------------
3509 
3510  SUnit *pickNode(bool &IsTopNode) override {
3511  SUnit *SU;
3512  if (IsTopDown) {
3513  do {
3514  if (TopQ.empty()) return nullptr;
3515  SU = TopQ.top();
3516  TopQ.pop();
3517  } while (SU->isScheduled);
3518  IsTopNode = true;
3519  } else {
3520  do {
3521  if (BottomQ.empty()) return nullptr;
3522  SU = BottomQ.top();
3523  BottomQ.pop();
3524  } while (SU->isScheduled);
3525  IsTopNode = false;
3526  }
3527  if (IsAlternating)
3528  IsTopDown = !IsTopDown;
3529  return SU;
3530  }
3531 
3532  void schedNode(SUnit *SU, bool IsTopNode) override {}
3533 
3534  void releaseTopNode(SUnit *SU) override {
3535  TopQ.push(SU);
3536  }
3537  void releaseBottomNode(SUnit *SU) override {
3538  BottomQ.push(SU);
3539  }
3540 };
3541 
3542 } // end anonymous namespace
3543 
3545  bool Alternate = !ForceTopDown && !ForceBottomUp;
3546  bool TopDown = !ForceBottomUp;
3547  assert((TopDown || !ForceTopDown) &&
3548  "-misched-topdown incompatible with -misched-bottomup");
3549  return new ScheduleDAGMILive(
3550  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3551 }
3552 
3554  "shuffle", "Shuffle machine instructions alternating directions",
3556 #endif // !NDEBUG
3557 
3558 //===----------------------------------------------------------------------===//
3559 // GraphWriter support for ScheduleDAGMILive.
3560 //===----------------------------------------------------------------------===//
3561 
3562 #ifndef NDEBUG
3563 namespace llvm {
3564 
3565 template<> struct GraphTraits<
3567 
3568 template<>
3571 
3572  static std::string getGraphName(const ScheduleDAG *G) {
3573  return G->MF.getName();
3574  }
3575 
3576  static bool renderGraphFromBottomUp() {
3577  return true;
3578  }
3579 
3580  static bool isNodeHidden(const SUnit *Node) {
3581  if (ViewMISchedCutoff == 0)
3582  return false;
3583  return (Node->Preds.size() > ViewMISchedCutoff
3584  || Node->Succs.size() > ViewMISchedCutoff);
3585  }
3586 
3587  /// If you want to override the dot attributes printed for a particular
3588  /// edge, override this method.
3589  static std::string getEdgeAttributes(const SUnit *Node,
3590  SUnitIterator EI,
3591  const ScheduleDAG *Graph) {
3592  if (EI.isArtificialDep())
3593  return "color=cyan,style=dashed";
3594  if (EI.isCtrlDep())
3595  return "color=blue,style=dashed";
3596  return "";
3597  }
3598 
3599  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3600  std::string Str;
3601  raw_string_ostream SS(Str);
3602  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3603  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3604  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3605  SS << "SU:" << SU->NodeNum;
3606  if (DFS)
3607  SS << " I:" << DFS->getNumInstrs(SU);
3608  return SS.str();
3609  }
3610 
3611  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3612  return G->getGraphNodeLabel(SU);
3613  }
3614 
3615  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3616  std::string Str("shape=Mrecord");
3617  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3618  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3619  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3620  if (DFS) {
3621  Str += ",style=filled,fillcolor=\"#";
3622  Str += DOT::getColorString(DFS->getSubtreeID(N));
3623  Str += '"';
3624  }
3625  return Str;
3626  }
3627 };
3628 
3629 } // end namespace llvm
3630 #endif // NDEBUG
3631 
3632 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3633 /// rendered using 'dot'.
3634 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3635 #ifndef NDEBUG
3636  ViewGraph(this, Name, false, Title);
3637 #else
3638  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3639  << "systems with Graphviz or gv!\n";
3640 #endif // NDEBUG
3641 }
3642 
3643 /// Out-of-line implementation with no arguments is handy for gdb.
3645  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3646 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:106
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
static int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:75
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
void push_back(const T &Elt)
Definition: SmallVector.h:212
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:235
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:329
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
static bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Segments::iterator iterator
Definition: LiveInterval.h:208
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dump(const ScheduleDAG *G) const
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:403
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:140
bool test(unsigned Idx) const
Definition: BitVector.h:502
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:454
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:305
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:289
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:274
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
An individual mapping from virtual register number to SUnit.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:212
static unsigned getWeakLeft(const SUnit *SU, bool isTop)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Result of a LiveRange query.
Definition: LiveInterval.h:90
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1169
Reg
All possible values of the reg field in the ModR/M byte.
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:304
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:276
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:273
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:451
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:639
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:380
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:367
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Apply a set of heursitics to a new candidate.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
static const unsigned End
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run &#39;dot&#39;, run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:348
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:74
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
Definition: MachineInstr.h:857
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:478
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:275
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
bool isDebugValue() const
Definition: MachineInstr.h:816
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static LaneBitmask getNone()
Definition: LaneBitmask.h:83
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:71
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heursitics to a new candidate for PostRA scheduling.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:181
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:411
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:141
Representation of each machine instruction.
Definition: MachineInstr.h:59
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
const std::vector< PressureChange > & getRegionCriticalPSets() const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:211
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:626
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:373
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:506
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:207
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:195
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
void finishBlock() override
Cleans up after scheduling in the given block.
static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:159
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:660
void resize(size_type N)
Definition: SmallVector.h:355