LLVM  9.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
245 
246 /// A dummy default scheduler factory indicates whether the scheduler
247 /// is overridden on the command line.
249  return nullptr;
250 }
251 
252 /// MachineSchedOpt allows command line selection of the scheduler.
255 MachineSchedOpt("misched",
257  cl::desc("Machine instruction scheduler to use"));
258 
260 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
262 
264  "enable-misched",
265  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
266  cl::Hidden);
267 
269  "enable-post-misched",
270  cl::desc("Enable the post-ra machine instruction scheduling pass."),
271  cl::init(true), cl::Hidden);
272 
273 /// Decrement this iterator until reaching the top or a non-debug instr.
277  assert(I != Beg && "reached the top of the region, cannot decrement");
278  while (--I != Beg) {
279  if (!I->isDebugInstr())
280  break;
281  }
282  return I;
283 }
284 
285 /// Non-const version.
291 }
292 
293 /// If this iterator is a debug value, increment until reaching the End or a
294 /// non-debug instruction.
298  for(; I != End; ++I) {
299  if (!I->isDebugInstr())
300  break;
301  }
302  return I;
303 }
304 
305 /// Non-const version.
311 }
312 
313 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
314 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
315  // Select the scheduler, or set the default.
316  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
317  if (Ctor != useDefaultMachineSched)
318  return Ctor(this);
319 
320  // Get the default scheduler set by the target for this function.
321  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
322  if (Scheduler)
323  return Scheduler;
324 
325  // Default to GenericScheduler.
326  return createGenericSchedLive(this);
327 }
328 
329 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
330 /// the caller. We don't have a command line option to override the postRA
331 /// scheduler. The Target must configure it.
332 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
333  // Get the postRA scheduler set by the target for this function.
334  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
335  if (Scheduler)
336  return Scheduler;
337 
338  // Default to GenericScheduler.
339  return createGenericSchedPostRA(this);
340 }
341 
342 /// Top-level MachineScheduler pass driver.
343 ///
344 /// Visit blocks in function order. Divide each block into scheduling regions
345 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
346 /// consistent with the DAG builder, which traverses the interior of the
347 /// scheduling regions bottom-up.
348 ///
349 /// This design avoids exposing scheduling boundaries to the DAG builder,
350 /// simplifying the DAG builder's support for "special" target instructions.
351 /// At the same time the design allows target schedulers to operate across
352 /// scheduling boundaries, for example to bundle the boundary instructions
353 /// without reordering them. This creates complexity, because the target
354 /// scheduler must update the RegionBegin and RegionEnd positions cached by
355 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
356 /// design would be to split blocks at scheduling boundaries, but LLVM has a
357 /// general bias against block splitting purely for implementation simplicity.
358 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
359  if (skipFunction(mf.getFunction()))
360  return false;
361 
362  if (EnableMachineSched.getNumOccurrences()) {
363  if (!EnableMachineSched)
364  return false;
365  } else if (!mf.getSubtarget().enableMachineScheduler())
366  return false;
367 
368  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
369 
370  // Initialize the context of the pass.
371  MF = &mf;
372  MLI = &getAnalysis<MachineLoopInfo>();
373  MDT = &getAnalysis<MachineDominatorTree>();
374  PassConfig = &getAnalysis<TargetPassConfig>();
375  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
376 
377  LIS = &getAnalysis<LiveIntervals>();
378 
379  if (VerifyScheduling) {
380  LLVM_DEBUG(LIS->dump());
381  MF->verify(this, "Before machine scheduling.");
382  }
383  RegClassInfo->runOnMachineFunction(*MF);
384 
385  // Instantiate the selected scheduler for this target, function, and
386  // optimization level.
387  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
388  scheduleRegions(*Scheduler, false);
389 
390  LLVM_DEBUG(LIS->dump());
391  if (VerifyScheduling)
392  MF->verify(this, "After machine scheduling.");
393  return true;
394 }
395 
396 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
397  if (skipFunction(mf.getFunction()))
398  return false;
399 
400  if (EnablePostRAMachineSched.getNumOccurrences()) {
401  if (!EnablePostRAMachineSched)
402  return false;
403  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
404  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
405  return false;
406  }
407  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
408 
409  // Initialize the context of the pass.
410  MF = &mf;
411  MLI = &getAnalysis<MachineLoopInfo>();
412  PassConfig = &getAnalysis<TargetPassConfig>();
413 
414  if (VerifyScheduling)
415  MF->verify(this, "Before post machine scheduling.");
416 
417  // Instantiate the selected scheduler for this target, function, and
418  // optimization level.
419  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
420  scheduleRegions(*Scheduler, true);
421 
422  if (VerifyScheduling)
423  MF->verify(this, "After post machine scheduling.");
424  return true;
425 }
426 
427 /// Return true of the given instruction should not be included in a scheduling
428 /// region.
429 ///
430 /// MachineScheduler does not currently support scheduling across calls. To
431 /// handle calls, the DAG builder needs to be modified to create register
432 /// anti/output dependencies on the registers clobbered by the call's regmask
433 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
434 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
435 /// the boundary, but there would be no benefit to postRA scheduling across
436 /// calls this late anyway.
438  MachineBasicBlock *MBB,
439  MachineFunction *MF,
440  const TargetInstrInfo *TII) {
441  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
442 }
443 
444 /// A region of an MBB for scheduling.
445 namespace {
446 struct SchedRegion {
447  /// RegionBegin is the first instruction in the scheduling region, and
448  /// RegionEnd is either MBB->end() or the scheduling boundary after the
449  /// last instruction in the scheduling region. These iterators cannot refer
450  /// to instructions outside of the identified scheduling region because
451  /// those may be reordered before scheduling this region.
452  MachineBasicBlock::iterator RegionBegin;
453  MachineBasicBlock::iterator RegionEnd;
454  unsigned NumRegionInstrs;
455 
457  unsigned N) :
458  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
459 };
460 } // end anonymous namespace
461 
463 
464 static void
466  MBBRegionsVector &Regions,
467  bool RegionsTopDown) {
468  MachineFunction *MF = MBB->getParent();
469  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
470 
471  MachineBasicBlock::iterator I = nullptr;
472  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
473  RegionEnd != MBB->begin(); RegionEnd = I) {
474 
475  // Avoid decrementing RegionEnd for blocks with no terminator.
476  if (RegionEnd != MBB->end() ||
477  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
478  --RegionEnd;
479  }
480 
481  // The next region starts above the previous region. Look backward in the
482  // instruction stream until we find the nearest boundary.
483  unsigned NumRegionInstrs = 0;
484  I = RegionEnd;
485  for (;I != MBB->begin(); --I) {
486  MachineInstr &MI = *std::prev(I);
487  if (isSchedBoundary(&MI, &*MBB, MF, TII))
488  break;
489  if (!MI.isDebugInstr()) {
490  // MBB::size() uses instr_iterator to count. Here we need a bundle to
491  // count as a single instruction.
492  ++NumRegionInstrs;
493  }
494  }
495 
496  // It's possible we found a scheduling region that only has debug
497  // instructions. Don't bother scheduling these.
498  if (NumRegionInstrs != 0)
499  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
500  }
501 
502  if (RegionsTopDown)
503  std::reverse(Regions.begin(), Regions.end());
504 }
505 
506 /// Main driver for both MachineScheduler and PostMachineScheduler.
507 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
508  bool FixKillFlags) {
509  // Visit all machine basic blocks.
510  //
511  // TODO: Visit blocks in global postorder or postorder within the bottom-up
512  // loop tree. Then we can optionally compute global RegPressure.
513  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
514  MBB != MBBEnd; ++MBB) {
515 
516  Scheduler.startBlock(&*MBB);
517 
518 #ifndef NDEBUG
519  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
520  continue;
521  if (SchedOnlyBlock.getNumOccurrences()
522  && (int)SchedOnlyBlock != MBB->getNumber())
523  continue;
524 #endif
525 
526  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
527  // points to the scheduling boundary at the bottom of the region. The DAG
528  // does not include RegionEnd, but the region does (i.e. the next
529  // RegionEnd is above the previous RegionBegin). If the current block has
530  // no terminator then RegionEnd == MBB->end() for the bottom region.
531  //
532  // All the regions of MBB are first found and stored in MBBRegions, which
533  // will be processed (MBB) top-down if initialized with true.
534  //
535  // The Scheduler may insert instructions during either schedule() or
536  // exitRegion(), even for empty regions. So the local iterators 'I' and
537  // 'RegionEnd' are invalid across these calls. Instructions must not be
538  // added to other regions than the current one without updating MBBRegions.
539 
540  MBBRegionsVector MBBRegions;
541  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
542  for (MBBRegionsVector::iterator R = MBBRegions.begin();
543  R != MBBRegions.end(); ++R) {
544  MachineBasicBlock::iterator I = R->RegionBegin;
545  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
546  unsigned NumRegionInstrs = R->NumRegionInstrs;
547 
548  // Notify the scheduler of the region, even if we may skip scheduling
549  // it. Perhaps it still needs to be bundled.
550  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
551 
552  // Skip empty scheduling regions (0 or 1 schedulable instructions).
553  if (I == RegionEnd || I == std::prev(RegionEnd)) {
554  // Close the current region. Bundle the terminator if needed.
555  // This invalidates 'RegionEnd' and 'I'.
556  Scheduler.exitRegion();
557  continue;
558  }
559  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
560  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
561  << " " << MBB->getName() << "\n From: " << *I
562  << " To: ";
563  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
564  else dbgs() << "End";
565  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
567  errs() << MF->getName();
568  errs() << ":%bb. " << MBB->getNumber();
569  errs() << " " << MBB->getName() << " \n";
570  }
571 
572  // Schedule a region: possibly reorder instructions.
573  // This invalidates the original region iterators.
574  Scheduler.schedule();
575 
576  // Close the current region.
577  Scheduler.exitRegion();
578  }
579  Scheduler.finishBlock();
580  // FIXME: Ideally, no further passes should rely on kill flags. However,
581  // thumb2 size reduction is currently an exception, so the PostMIScheduler
582  // needs to do this.
583  if (FixKillFlags)
584  Scheduler.fixupKills(*MBB);
585  }
586  Scheduler.finalizeSchedule();
587 }
588 
589 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
590  // unimplemented
591 }
592 
593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
595  dbgs() << "Queue " << Name << ": ";
596  for (const SUnit *SU : Queue)
597  dbgs() << SU->NodeNum << " ";
598  dbgs() << "\n";
599 }
600 #endif
601 
602 //===----------------------------------------------------------------------===//
603 // ScheduleDAGMI - Basic machine instruction scheduling. This is
604 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
605 // virtual registers.
606 // ===----------------------------------------------------------------------===/
607 
608 // Provide a vtable anchor.
610 
611 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
612 /// NumPredsLeft reaches zero, release the successor node.
613 ///
614 /// FIXME: Adjust SuccSU height based on MinLatency.
615 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
616  SUnit *SuccSU = SuccEdge->getSUnit();
617 
618  if (SuccEdge->isWeak()) {
619  --SuccSU->WeakPredsLeft;
620  if (SuccEdge->isCluster())
621  NextClusterSucc = SuccSU;
622  return;
623  }
624 #ifndef NDEBUG
625  if (SuccSU->NumPredsLeft == 0) {
626  dbgs() << "*** Scheduling failed! ***\n";
627  dumpNode(*SuccSU);
628  dbgs() << " has been released too many times!\n";
629  llvm_unreachable(nullptr);
630  }
631 #endif
632  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
633  // CurrCycle may have advanced since then.
634  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
635  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
636 
637  --SuccSU->NumPredsLeft;
638  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
639  SchedImpl->releaseTopNode(SuccSU);
640 }
641 
642 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
644  for (SDep &Succ : SU->Succs)
645  releaseSucc(SU, &Succ);
646 }
647 
648 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
649 /// NumSuccsLeft reaches zero, release the predecessor node.
650 ///
651 /// FIXME: Adjust PredSU height based on MinLatency.
652 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
653  SUnit *PredSU = PredEdge->getSUnit();
654 
655  if (PredEdge->isWeak()) {
656  --PredSU->WeakSuccsLeft;
657  if (PredEdge->isCluster())
658  NextClusterPred = PredSU;
659  return;
660  }
661 #ifndef NDEBUG
662  if (PredSU->NumSuccsLeft == 0) {
663  dbgs() << "*** Scheduling failed! ***\n";
664  dumpNode(*PredSU);
665  dbgs() << " has been released too many times!\n";
666  llvm_unreachable(nullptr);
667  }
668 #endif
669  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
670  // CurrCycle may have advanced since then.
671  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
672  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
673 
674  --PredSU->NumSuccsLeft;
675  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
676  SchedImpl->releaseBottomNode(PredSU);
677 }
678 
679 /// releasePredecessors - Call releasePred on each of SU's predecessors.
681  for (SDep &Pred : SU->Preds)
682  releasePred(SU, &Pred);
683 }
684 
687  SchedImpl->enterMBB(bb);
688 }
689 
691  SchedImpl->leaveMBB();
693 }
694 
695 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
696 /// crossing a scheduling boundary. [begin, end) includes all instructions in
697 /// the region, including the boundary itself and single-instruction regions
698 /// that don't get scheduled.
702  unsigned regioninstrs)
703 {
704  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
705 
706  SchedImpl->initPolicy(begin, end, regioninstrs);
707 }
708 
709 /// This is normally called from the main scheduler loop but may also be invoked
710 /// by the scheduling strategy to perform additional code motion.
713  // Advance RegionBegin if the first instruction moves down.
714  if (&*RegionBegin == MI)
715  ++RegionBegin;
716 
717  // Update the instruction stream.
718  BB->splice(InsertPos, BB, MI);
719 
720  // Update LiveIntervals
721  if (LIS)
722  LIS->handleMove(*MI, /*UpdateFlags=*/true);
723 
724  // Recede RegionBegin if an instruction moves above the first.
725  if (RegionBegin == InsertPos)
726  RegionBegin = MI;
727 }
728 
730 #ifndef NDEBUG
731  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
732  CurrentTop = CurrentBottom;
733  return false;
734  }
735  ++NumInstrsScheduled;
736 #endif
737  return true;
738 }
739 
740 /// Per-region scheduling driver, called back from
741 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
742 /// does not consider liveness or register pressure. It is useful for PostRA
743 /// scheduling and potentially other custom schedulers.
745  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
746  LLVM_DEBUG(SchedImpl->dumpPolicy());
747 
748  // Build the DAG.
749  buildSchedGraph(AA);
750 
751  postprocessDAG();
752 
753  SmallVector<SUnit*, 8> TopRoots, BotRoots;
754  findRootsAndBiasEdges(TopRoots, BotRoots);
755 
756  LLVM_DEBUG(dump());
757  if (PrintDAGs) dump();
758  if (ViewMISchedDAGs) viewGraph();
759 
760  // Initialize the strategy before modifying the DAG.
761  // This may initialize a DFSResult to be used for queue priority.
762  SchedImpl->initialize(this);
763 
764  // Initialize ready queues now that the DAG and priority data are finalized.
765  initQueues(TopRoots, BotRoots);
766 
767  bool IsTopNode = false;
768  while (true) {
769  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
770  SUnit *SU = SchedImpl->pickNode(IsTopNode);
771  if (!SU) break;
772 
773  assert(!SU->isScheduled && "Node already scheduled");
774  if (!checkSchedLimit())
775  break;
776 
777  MachineInstr *MI = SU->getInstr();
778  if (IsTopNode) {
779  assert(SU->isTopReady() && "node still has unscheduled dependencies");
780  if (&*CurrentTop == MI)
781  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
782  else
783  moveInstruction(MI, CurrentTop);
784  } else {
785  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
787  priorNonDebug(CurrentBottom, CurrentTop);
788  if (&*priorII == MI)
789  CurrentBottom = priorII;
790  else {
791  if (&*CurrentTop == MI)
792  CurrentTop = nextIfDebug(++CurrentTop, priorII);
793  moveInstruction(MI, CurrentBottom);
794  CurrentBottom = MI;
795  }
796  }
797  // Notify the scheduling strategy before updating the DAG.
798  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
799  // runs, it can then use the accurate ReadyCycle time to determine whether
800  // newly released nodes can move to the readyQ.
801  SchedImpl->schedNode(SU, IsTopNode);
802 
803  updateQueues(SU, IsTopNode);
804  }
805  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
806 
807  placeDebugValues();
808 
809  LLVM_DEBUG({
810  dbgs() << "*** Final schedule for "
811  << printMBBReference(*begin()->getParent()) << " ***\n";
812  dumpSchedule();
813  dbgs() << '\n';
814  });
815 }
816 
817 /// Apply each ScheduleDAGMutation step in order.
819  for (auto &m : Mutations)
820  m->apply(this);
821 }
822 
823 void ScheduleDAGMI::
825  SmallVectorImpl<SUnit*> &BotRoots) {
826  for (SUnit &SU : SUnits) {
827  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
828 
829  // Order predecessors so DFSResult follows the critical path.
830  SU.biasCriticalPath();
831 
832  // A SUnit is ready to top schedule if it has no predecessors.
833  if (!SU.NumPredsLeft)
834  TopRoots.push_back(&SU);
835  // A SUnit is ready to bottom schedule if it has no successors.
836  if (!SU.NumSuccsLeft)
837  BotRoots.push_back(&SU);
838  }
839  ExitSU.biasCriticalPath();
840 }
841 
842 /// Identify DAG roots and setup scheduler queues.
844  ArrayRef<SUnit*> BotRoots) {
845  NextClusterSucc = nullptr;
846  NextClusterPred = nullptr;
847 
848  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
849  //
850  // Nodes with unreleased weak edges can still be roots.
851  // Release top roots in forward order.
852  for (SUnit *SU : TopRoots)
853  SchedImpl->releaseTopNode(SU);
854 
855  // Release bottom roots in reverse order so the higher priority nodes appear
856  // first. This is more natural and slightly more efficient.
858  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
859  SchedImpl->releaseBottomNode(*I);
860  }
861 
862  releaseSuccessors(&EntrySU);
863  releasePredecessors(&ExitSU);
864 
865  SchedImpl->registerRoots();
866 
867  // Advance past initial DebugValues.
868  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
869  CurrentBottom = RegionEnd;
870 }
871 
872 /// Update scheduler queues after scheduling an instruction.
873 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
874  // Release dependent instructions for scheduling.
875  if (IsTopNode)
876  releaseSuccessors(SU);
877  else
878  releasePredecessors(SU);
879 
880  SU->isScheduled = true;
881 }
882 
883 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
885  // If first instruction was a DBG_VALUE then put it back.
886  if (FirstDbgValue) {
887  BB->splice(RegionBegin, BB, FirstDbgValue);
888  RegionBegin = FirstDbgValue;
889  }
890 
891  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
892  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
893  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
894  MachineInstr *DbgValue = P.first;
895  MachineBasicBlock::iterator OrigPrevMI = P.second;
896  if (&*RegionBegin == DbgValue)
897  ++RegionBegin;
898  BB->splice(++OrigPrevMI, BB, DbgValue);
899  if (OrigPrevMI == std::prev(RegionEnd))
900  RegionEnd = DbgValue;
901  }
902  DbgValues.clear();
903  FirstDbgValue = nullptr;
904 }
905 
906 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
908  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
909  if (SUnit *SU = getSUnit(&(*MI)))
910  dumpNode(*SU);
911  else
912  dbgs() << "Missing SUnit\n";
913  }
914 }
915 #endif
916 
917 //===----------------------------------------------------------------------===//
918 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
919 // preservation.
920 //===----------------------------------------------------------------------===//
921 
923  delete DFSResult;
924 }
925 
927  const MachineInstr &MI = *SU.getInstr();
928  for (const MachineOperand &MO : MI.operands()) {
929  if (!MO.isReg())
930  continue;
931  if (!MO.readsReg())
932  continue;
933  if (TrackLaneMasks && !MO.isUse())
934  continue;
935 
936  unsigned Reg = MO.getReg();
938  continue;
939 
940  // Ignore re-defs.
941  if (TrackLaneMasks) {
942  bool FoundDef = false;
943  for (const MachineOperand &MO2 : MI.operands()) {
944  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
945  FoundDef = true;
946  break;
947  }
948  }
949  if (FoundDef)
950  continue;
951  }
952 
953  // Record this local VReg use.
954  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
955  for (; UI != VRegUses.end(); ++UI) {
956  if (UI->SU == &SU)
957  break;
958  }
959  if (UI == VRegUses.end())
960  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
961  }
962 }
963 
964 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
965 /// crossing a scheduling boundary. [begin, end) includes all instructions in
966 /// the region, including the boundary itself and single-instruction regions
967 /// that don't get scheduled.
971  unsigned regioninstrs)
972 {
973  // ScheduleDAGMI initializes SchedImpl's per-region policy.
974  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
975 
976  // For convenience remember the end of the liveness region.
977  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
978 
979  SUPressureDiffs.clear();
980 
981  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
982  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
983 
984  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
985  "ShouldTrackLaneMasks requires ShouldTrackPressure");
986 }
987 
988 // Setup the register pressure trackers for the top scheduled top and bottom
989 // scheduled regions.
991  VRegUses.clear();
992  VRegUses.setUniverse(MRI.getNumVirtRegs());
993  for (SUnit &SU : SUnits)
994  collectVRegUses(SU);
995 
996  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
997  ShouldTrackLaneMasks, false);
998  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
999  ShouldTrackLaneMasks, false);
1000 
1001  // Close the RPTracker to finalize live ins.
1002  RPTracker.closeRegion();
1003 
1004  LLVM_DEBUG(RPTracker.dump());
1005 
1006  // Initialize the live ins and live outs.
1007  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1008  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1009 
1010  // Close one end of the tracker so we can call
1011  // getMaxUpward/DownwardPressureDelta before advancing across any
1012  // instructions. This converts currently live regs into live ins/outs.
1013  TopRPTracker.closeTop();
1014  BotRPTracker.closeBottom();
1015 
1016  BotRPTracker.initLiveThru(RPTracker);
1017  if (!BotRPTracker.getLiveThru().empty()) {
1018  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1019  LLVM_DEBUG(dbgs() << "Live Thru: ";
1020  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1021  };
1022 
1023  // For each live out vreg reduce the pressure change associated with other
1024  // uses of the same vreg below the live-out reaching def.
1025  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1026 
1027  // Account for liveness generated by the region boundary.
1028  if (LiveRegionEnd != RegionEnd) {
1030  BotRPTracker.recede(&LiveUses);
1031  updatePressureDiffs(LiveUses);
1032  }
1033 
1034  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1035  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1036  dbgs() << "Bottom Pressure:\n";
1037  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1038 
1039  assert((BotRPTracker.getPos() == RegionEnd ||
1040  (RegionEnd->isDebugInstr() &&
1041  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1042  "Can't find the region bottom");
1043 
1044  // Cache the list of excess pressure sets in this region. This will also track
1045  // the max pressure in the scheduled code for these sets.
1046  RegionCriticalPSets.clear();
1047  const std::vector<unsigned> &RegionPressure =
1048  RPTracker.getPressure().MaxSetPressure;
1049  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1050  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1051  if (RegionPressure[i] > Limit) {
1052  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1053  << " Actual " << RegionPressure[i] << "\n");
1054  RegionCriticalPSets.push_back(PressureChange(i));
1055  }
1056  }
1057  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1058  for (const PressureChange &RCPS
1059  : RegionCriticalPSets) dbgs()
1060  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1061  dbgs() << "\n");
1062 }
1063 
1066  const std::vector<unsigned> &NewMaxPressure) {
1067  const PressureDiff &PDiff = getPressureDiff(SU);
1068  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1069  for (const PressureChange &PC : PDiff) {
1070  if (!PC.isValid())
1071  break;
1072  unsigned ID = PC.getPSet();
1073  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1074  ++CritIdx;
1075  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1076  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1077  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1078  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1079  }
1080  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1081  if (NewMaxPressure[ID] >= Limit - 2) {
1082  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1083  << NewMaxPressure[ID]
1084  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1085  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1086  << " livethru)\n");
1087  }
1088  }
1089 }
1090 
1091 /// Update the PressureDiff array for liveness after scheduling this
1092 /// instruction.
1094  ArrayRef<RegisterMaskPair> LiveUses) {
1095  for (const RegisterMaskPair &P : LiveUses) {
1096  unsigned Reg = P.RegUnit;
1097  /// FIXME: Currently assuming single-use physregs.
1098  if (!TRI->isVirtualRegister(Reg))
1099  continue;
1100 
1101  if (ShouldTrackLaneMasks) {
1102  // If the register has just become live then other uses won't change
1103  // this fact anymore => decrement pressure.
1104  // If the register has just become dead then other uses make it come
1105  // back to life => increment pressure.
1106  bool Decrement = P.LaneMask.any();
1107 
1108  for (const VReg2SUnit &V2SU
1109  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1110  SUnit &SU = *V2SU.SU;
1111  if (SU.isScheduled || &SU == &ExitSU)
1112  continue;
1113 
1114  PressureDiff &PDiff = getPressureDiff(&SU);
1115  PDiff.addPressureChange(Reg, Decrement, &MRI);
1116  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1117  << printReg(Reg, TRI) << ':'
1118  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1119  dbgs() << " to "; PDiff.dump(*TRI););
1120  }
1121  } else {
1122  assert(P.LaneMask.any());
1123  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1124  // This may be called before CurrentBottom has been initialized. However,
1125  // BotRPTracker must have a valid position. We want the value live into the
1126  // instruction or live out of the block, so ask for the previous
1127  // instruction's live-out.
1128  const LiveInterval &LI = LIS->getInterval(Reg);
1129  VNInfo *VNI;
1131  nextIfDebug(BotRPTracker.getPos(), BB->end());
1132  if (I == BB->end())
1133  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1134  else {
1135  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1136  VNI = LRQ.valueIn();
1137  }
1138  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1139  assert(VNI && "No live value at use.");
1140  for (const VReg2SUnit &V2SU
1141  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1142  SUnit *SU = V2SU.SU;
1143  // If this use comes before the reaching def, it cannot be a last use,
1144  // so decrease its pressure change.
1145  if (!SU->isScheduled && SU != &ExitSU) {
1146  LiveQueryResult LRQ =
1147  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1148  if (LRQ.valueIn() == VNI) {
1149  PressureDiff &PDiff = getPressureDiff(SU);
1150  PDiff.addPressureChange(Reg, true, &MRI);
1151  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1152  << *SU->getInstr();
1153  dbgs() << " to "; PDiff.dump(*TRI););
1154  }
1155  }
1156  }
1157  }
1158  }
1159 }
1160 
1162 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1163  if (EntrySU.getInstr() != nullptr)
1164  dumpNodeAll(EntrySU);
1165  for (const SUnit &SU : SUnits) {
1166  dumpNodeAll(SU);
1167  if (ShouldTrackPressure) {
1168  dbgs() << " Pressure Diff : ";
1169  getPressureDiff(&SU).dump(*TRI);
1170  }
1171  dbgs() << " Single Issue : ";
1172  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1173  SchedModel.mustEndGroup(SU.getInstr()))
1174  dbgs() << "true;";
1175  else
1176  dbgs() << "false;";
1177  dbgs() << '\n';
1178  }
1179  if (ExitSU.getInstr() != nullptr)
1180  dumpNodeAll(ExitSU);
1181 #endif
1182 }
1183 
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1195  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196  LLVM_DEBUG(SchedImpl->dumpPolicy());
1197  buildDAGWithRegPressure();
1198 
1199  postprocessDAG();
1200 
1201  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202  findRootsAndBiasEdges(TopRoots, BotRoots);
1203 
1204  // Initialize the strategy before modifying the DAG.
1205  // This may initialize a DFSResult to be used for queue priority.
1206  SchedImpl->initialize(this);
1207 
1208  LLVM_DEBUG(dump());
1209  if (PrintDAGs) dump();
1210  if (ViewMISchedDAGs) viewGraph();
1211 
1212  // Initialize ready queues now that the DAG and priority data are finalized.
1213  initQueues(TopRoots, BotRoots);
1214 
1215  bool IsTopNode = false;
1216  while (true) {
1217  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1218  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1219  if (!SU) break;
1220 
1221  assert(!SU->isScheduled && "Node already scheduled");
1222  if (!checkSchedLimit())
1223  break;
1224 
1225  scheduleMI(SU, IsTopNode);
1226 
1227  if (DFSResult) {
1228  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1229  if (!ScheduledTrees.test(SubtreeID)) {
1230  ScheduledTrees.set(SubtreeID);
1231  DFSResult->scheduleTree(SubtreeID);
1232  SchedImpl->scheduleTree(SubtreeID);
1233  }
1234  }
1235 
1236  // Notify the scheduling strategy after updating the DAG.
1237  SchedImpl->schedNode(SU, IsTopNode);
1238 
1239  updateQueues(SU, IsTopNode);
1240  }
1241  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1242 
1243  placeDebugValues();
1244 
1245  LLVM_DEBUG({
1246  dbgs() << "*** Final schedule for "
1247  << printMBBReference(*begin()->getParent()) << " ***\n";
1248  dumpSchedule();
1249  dbgs() << '\n';
1250  });
1251 }
1252 
1253 /// Build the DAG and setup three register pressure trackers.
1255  if (!ShouldTrackPressure) {
1256  RPTracker.reset();
1257  RegionCriticalPSets.clear();
1258  buildSchedGraph(AA);
1259  return;
1260  }
1261 
1262  // Initialize the register pressure tracker used by buildSchedGraph.
1263  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1264  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1265 
1266  // Account for liveness generate by the region boundary.
1267  if (LiveRegionEnd != RegionEnd)
1268  RPTracker.recede();
1269 
1270  // Build the DAG, and compute current register pressure.
1271  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1272 
1273  // Initialize top/bottom trackers after computing region pressure.
1274  initRegPressure();
1275 }
1276 
1278  if (!DFSResult)
1279  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1280  DFSResult->clear();
1281  ScheduledTrees.clear();
1282  DFSResult->resize(SUnits.size());
1283  DFSResult->compute(SUnits);
1284  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1285 }
1286 
1287 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1288 /// only provides the critical path for single block loops. To handle loops that
1289 /// span blocks, we could use the vreg path latencies provided by
1290 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1291 /// available for use in the scheduler.
1292 ///
1293 /// The cyclic path estimation identifies a def-use pair that crosses the back
1294 /// edge and considers the depth and height of the nodes. For example, consider
1295 /// the following instruction sequence where each instruction has unit latency
1296 /// and defines an epomymous virtual register:
1297 ///
1298 /// a->b(a,c)->c(b)->d(c)->exit
1299 ///
1300 /// The cyclic critical path is a two cycles: b->c->b
1301 /// The acyclic critical path is four cycles: a->b->c->d->exit
1302 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1303 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1304 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1305 /// LiveInDepth = depth(b) = len(a->b) = 1
1306 ///
1307 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1308 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1309 /// CyclicCriticalPath = min(2, 2) = 2
1310 ///
1311 /// This could be relevant to PostRA scheduling, but is currently implemented
1312 /// assuming LiveIntervals.
1314  // This only applies to single block loop.
1315  if (!BB->isSuccessor(BB))
1316  return 0;
1317 
1318  unsigned MaxCyclicLatency = 0;
1319  // Visit each live out vreg def to find def/use pairs that cross iterations.
1320  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1321  unsigned Reg = P.RegUnit;
1322  if (!TRI->isVirtualRegister(Reg))
1323  continue;
1324  const LiveInterval &LI = LIS->getInterval(Reg);
1325  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1326  if (!DefVNI)
1327  continue;
1328 
1329  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1330  const SUnit *DefSU = getSUnit(DefMI);
1331  if (!DefSU)
1332  continue;
1333 
1334  unsigned LiveOutHeight = DefSU->getHeight();
1335  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1336  // Visit all local users of the vreg def.
1337  for (const VReg2SUnit &V2SU
1338  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1339  SUnit *SU = V2SU.SU;
1340  if (SU == &ExitSU)
1341  continue;
1342 
1343  // Only consider uses of the phi.
1344  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1345  if (!LRQ.valueIn()->isPHIDef())
1346  continue;
1347 
1348  // Assume that a path spanning two iterations is a cycle, which could
1349  // overestimate in strange cases. This allows cyclic latency to be
1350  // estimated as the minimum slack of the vreg's depth or height.
1351  unsigned CyclicLatency = 0;
1352  if (LiveOutDepth > SU->getDepth())
1353  CyclicLatency = LiveOutDepth - SU->getDepth();
1354 
1355  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1356  if (LiveInHeight > LiveOutHeight) {
1357  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1358  CyclicLatency = LiveInHeight - LiveOutHeight;
1359  } else
1360  CyclicLatency = 0;
1361 
1362  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1363  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1364  if (CyclicLatency > MaxCyclicLatency)
1365  MaxCyclicLatency = CyclicLatency;
1366  }
1367  }
1368  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1369  return MaxCyclicLatency;
1370 }
1371 
1372 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1373 /// the Top RP tracker in case the region beginning has changed.
1375  ArrayRef<SUnit*> BotRoots) {
1376  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1377  if (ShouldTrackPressure) {
1378  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1379  TopRPTracker.setPos(CurrentTop);
1380  }
1381 }
1382 
1383 /// Move an instruction and update register pressure.
1384 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1385  // Move the instruction to its new location in the instruction stream.
1386  MachineInstr *MI = SU->getInstr();
1387 
1388  if (IsTopNode) {
1389  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1390  if (&*CurrentTop == MI)
1391  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1392  else {
1393  moveInstruction(MI, CurrentTop);
1394  TopRPTracker.setPos(MI);
1395  }
1396 
1397  if (ShouldTrackPressure) {
1398  // Update top scheduled pressure.
1399  RegisterOperands RegOpers;
1400  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1401  if (ShouldTrackLaneMasks) {
1402  // Adjust liveness and add missing dead+read-undef flags.
1403  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1404  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1405  } else {
1406  // Adjust for missing dead-def flags.
1407  RegOpers.detectDeadDefs(*MI, *LIS);
1408  }
1409 
1410  TopRPTracker.advance(RegOpers);
1411  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1412  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1413  TopRPTracker.getRegSetPressureAtPos(), TRI););
1414 
1415  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1416  }
1417  } else {
1418  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1419  MachineBasicBlock::iterator priorII =
1420  priorNonDebug(CurrentBottom, CurrentTop);
1421  if (&*priorII == MI)
1422  CurrentBottom = priorII;
1423  else {
1424  if (&*CurrentTop == MI) {
1425  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1426  TopRPTracker.setPos(CurrentTop);
1427  }
1428  moveInstruction(MI, CurrentBottom);
1429  CurrentBottom = MI;
1430  BotRPTracker.setPos(CurrentBottom);
1431  }
1432  if (ShouldTrackPressure) {
1433  RegisterOperands RegOpers;
1434  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1435  if (ShouldTrackLaneMasks) {
1436  // Adjust liveness and add missing dead+read-undef flags.
1437  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1438  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1439  } else {
1440  // Adjust for missing dead-def flags.
1441  RegOpers.detectDeadDefs(*MI, *LIS);
1442  }
1443 
1444  if (BotRPTracker.getPos() != CurrentBottom)
1445  BotRPTracker.recedeSkipDebugValues();
1447  BotRPTracker.recede(RegOpers, &LiveUses);
1448  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1449  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1450  BotRPTracker.getRegSetPressureAtPos(), TRI););
1451 
1452  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1453  updatePressureDiffs(LiveUses);
1454  }
1455  }
1456 }
1457 
1458 //===----------------------------------------------------------------------===//
1459 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1460 //===----------------------------------------------------------------------===//
1461 
1462 namespace {
1463 
1464 /// Post-process the DAG to create cluster edges between neighboring
1465 /// loads or between neighboring stores.
1466 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1467  struct MemOpInfo {
1468  SUnit *SU;
1469  const MachineOperand *BaseOp;
1470  int64_t Offset;
1471 
1472  MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1473  : SU(su), BaseOp(Op), Offset(ofs) {}
1474 
1475  bool operator<(const MemOpInfo &RHS) const {
1476  if (BaseOp->getType() != RHS.BaseOp->getType())
1477  return BaseOp->getType() < RHS.BaseOp->getType();
1478 
1479  if (BaseOp->isReg())
1480  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1481  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1482  RHS.SU->NodeNum);
1483  if (BaseOp->isFI()) {
1484  const MachineFunction &MF =
1485  *BaseOp->getParent()->getParent()->getParent();
1486  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1487  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1489  // Can't use tuple comparison here since we might need to use a
1490  // different order when the stack grows down.
1491  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1492  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1493  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1494 
1495  if (Offset != RHS.Offset)
1496  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1497 
1498  return SU->NodeNum < RHS.SU->NodeNum;
1499  }
1500 
1501  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1502  "index bases.");
1503  }
1504  };
1505 
1506  const TargetInstrInfo *TII;
1507  const TargetRegisterInfo *TRI;
1508  bool IsLoad;
1509 
1510 public:
1511  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1512  const TargetRegisterInfo *tri, bool IsLoad)
1513  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1514 
1515  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1516 
1517 protected:
1518  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1519 };
1520 
1521 class StoreClusterMutation : public BaseMemOpClusterMutation {
1522 public:
1523  StoreClusterMutation(const TargetInstrInfo *tii,
1524  const TargetRegisterInfo *tri)
1525  : BaseMemOpClusterMutation(tii, tri, false) {}
1526 };
1527 
1528 class LoadClusterMutation : public BaseMemOpClusterMutation {
1529 public:
1530  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1531  : BaseMemOpClusterMutation(tii, tri, true) {}
1532 };
1533 
1534 } // end anonymous namespace
1535 
1536 namespace llvm {
1537 
1538 std::unique_ptr<ScheduleDAGMutation>
1540  const TargetRegisterInfo *TRI) {
1541  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1542  : nullptr;
1543 }
1544 
1545 std::unique_ptr<ScheduleDAGMutation>
1547  const TargetRegisterInfo *TRI) {
1548  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1549  : nullptr;
1550 }
1551 
1552 } // end namespace llvm
1553 
1554 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1555  ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1556  SmallVector<MemOpInfo, 32> MemOpRecords;
1557  for (SUnit *SU : MemOps) {
1558  const MachineOperand *BaseOp;
1559  int64_t Offset;
1560  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1561  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1562  }
1563  if (MemOpRecords.size() < 2)
1564  return;
1565 
1566  llvm::sort(MemOpRecords);
1567  unsigned ClusterLength = 1;
1568  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1569  SUnit *SUa = MemOpRecords[Idx].SU;
1570  SUnit *SUb = MemOpRecords[Idx+1].SU;
1571  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1572  *MemOpRecords[Idx + 1].BaseOp,
1573  ClusterLength) &&
1574  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1575  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1576  << SUb->NodeNum << ")\n");
1577  // Copy successor edges from SUa to SUb. Interleaving computation
1578  // dependent on SUa can prevent load combining due to register reuse.
1579  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1580  // loads should have effectively the same inputs.
1581  for (const SDep &Succ : SUa->Succs) {
1582  if (Succ.getSUnit() == SUb)
1583  continue;
1584  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1585  << ")\n");
1586  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1587  }
1588  ++ClusterLength;
1589  } else
1590  ClusterLength = 1;
1591  }
1592 }
1593 
1594 /// Callback from DAG postProcessing to create cluster edges for loads.
1596  // Map DAG NodeNum to store chain ID.
1597  DenseMap<unsigned, unsigned> StoreChainIDs;
1598  // Map each store chain to a set of dependent MemOps.
1599  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1600  for (SUnit &SU : DAG->SUnits) {
1601  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1602  (!IsLoad && !SU.getInstr()->mayStore()))
1603  continue;
1604 
1605  unsigned ChainPredID = DAG->SUnits.size();
1606  for (const SDep &Pred : SU.Preds) {
1607  if (Pred.isCtrl()) {
1608  ChainPredID = Pred.getSUnit()->NodeNum;
1609  break;
1610  }
1611  }
1612  // Check if this chain-like pred has been seen
1613  // before. ChainPredID==MaxNodeID at the top of the schedule.
1614  unsigned NumChains = StoreChainDependents.size();
1615  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1616  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1617  if (Result.second)
1618  StoreChainDependents.resize(NumChains + 1);
1619  StoreChainDependents[Result.first->second].push_back(&SU);
1620  }
1621 
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChainDependents)
1624  clusterNeighboringMemOps(SCD, DAG);
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 // CopyConstrain - DAG post-processing to encourage copy elimination.
1629 //===----------------------------------------------------------------------===//
1630 
1631 namespace {
1632 
1633 /// Post-process the DAG to create weak edges from all uses of a copy to
1634 /// the one use that defines the copy's source vreg, most likely an induction
1635 /// variable increment.
1636 class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639 
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643 
1644 public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646 
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648 
1649 protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651 };
1652 
1653 } // end anonymous namespace
1654 
1655 namespace llvm {
1656 
1657 std::unique_ptr<ScheduleDAGMutation>
1659  const TargetRegisterInfo *TRI) {
1660  return llvm::make_unique<CopyConstrain>(TII, TRI);
1661 }
1662 
1663 } // end namespace llvm
1664 
1665 /// constrainLocalCopy handles two possibilities:
1666 /// 1) Local src:
1667 /// I0: = dst
1668 /// I1: src = ...
1669 /// I2: = dst
1670 /// I3: dst = src (copy)
1671 /// (create pred->succ edges I0->I1, I2->I1)
1672 ///
1673 /// 2) Local copy:
1674 /// I0: dst = src (copy)
1675 /// I1: = dst
1676 /// I2: src = ...
1677 /// I3: = dst
1678 /// (create pred->succ edges I1->I2, I3->I2)
1679 ///
1680 /// Although the MachineScheduler is currently constrained to single blocks,
1681 /// this algorithm should handle extended blocks. An EBB is a set of
1682 /// contiguously numbered blocks such that the previous block in the EBB is
1683 /// always the single predecessor.
1684 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687 
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  unsigned SrcReg = SrcOp.getReg();
1691  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692  return;
1693 
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  unsigned DstReg = DstOp.getReg();
1696  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1697  return;
1698 
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709  LocalReg = DstReg;
1710  GlobalReg = SrcReg;
1711  LocalLI = &LIS->getInterval(LocalReg);
1712  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713  return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716 
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724  return;
1725 
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731  ++GlobalSegment;
1732 
1733  if (GlobalSegment == GlobalLI->end())
1734  return;
1735 
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738  // Two address defs have no hole.
1739  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740  GlobalSegment->start)) {
1741  return;
1742  }
1743  // If the prior global segment may be defined by the same two-address
1744  // instruction that also defines LocalLI, then can't make a hole here.
1745  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746  LocalLI->beginIndex())) {
1747  return;
1748  }
1749  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750  // it would be a disconnected component in the live range.
1751  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752  "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756  return;
1757 
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760  return;
1761 
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770  continue;
1771  if (Succ.getSUnit() == GlobalSU)
1772  continue;
1773  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774  return;
1775  LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785  continue;
1786  if (Pred.getSUnit() == FirstLocalSU)
1787  continue;
1788  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789  return;
1790  GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1795  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1797  << GlobalSU->NodeNum << ")\n");
1798  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1801  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1803  << FirstLocalSU->NodeNum << ")\n");
1804  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806 }
1807 
1808 /// Callback from DAG postProcessing to create weak edges to encourage
1809 /// copy elimination.
1810 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813 
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816  return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819  *priorNonDebug(DAG->end(), DAG->begin()));
1820 
1821  for (SUnit &SU : DAG->SUnits) {
1822  if (!SU.getInstr()->isCopy())
1823  continue;
1824 
1825  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831 // and possibly other custom schedulers.
1832 //===----------------------------------------------------------------------===//
1833 
1834 static const unsigned InvalidCycle = ~0U;
1835 
1836 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837 
1838 /// Given a Count of resource usage and a Latency value, return true if a
1839 /// SchedBoundary becomes resource limited.
1840 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1841  unsigned Latency) {
1842  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1843 }
1844 
1846  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1847  // Destroying and reconstructing it is very expensive though. So keep
1848  // invalid, placeholder HazardRecs.
1849  if (HazardRec && HazardRec->isEnabled()) {
1850  delete HazardRec;
1851  HazardRec = nullptr;
1852  }
1853  Available.clear();
1854  Pending.clear();
1855  CheckPending = false;
1856  CurrCycle = 0;
1857  CurrMOps = 0;
1858  MinReadyCycle = std::numeric_limits<unsigned>::max();
1859  ExpectedLatency = 0;
1860  DependentLatency = 0;
1861  RetiredMOps = 0;
1862  MaxExecutedResCount = 0;
1863  ZoneCritResIdx = 0;
1864  IsResourceLimited = false;
1865  ReservedCycles.clear();
1866 #ifndef NDEBUG
1867  // Track the maximum number of stall cycles that could arise either from the
1868  // latency of a DAG edge or the number of cycles that a processor resource is
1869  // reserved (SchedBoundary::ReservedCycles).
1870  MaxObservedStall = 0;
1871 #endif
1872  // Reserve a zero-count for invalid CritResIdx.
1873  ExecutedResCounts.resize(1);
1874  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1875 }
1876 
1877 void SchedRemainder::
1878 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1879  reset();
1880  if (!SchedModel->hasInstrSchedModel())
1881  return;
1882  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1883  for (SUnit &SU : DAG->SUnits) {
1884  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1885  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1886  * SchedModel->getMicroOpFactor();
1888  PI = SchedModel->getWriteProcResBegin(SC),
1889  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1890  unsigned PIdx = PI->ProcResourceIdx;
1891  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1892  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1893  }
1894  }
1895 }
1896 
1897 void SchedBoundary::
1899  reset();
1900  DAG = dag;
1901  SchedModel = smodel;
1902  Rem = rem;
1903  if (SchedModel->hasInstrSchedModel()) {
1904  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1905  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1906  }
1907 }
1908 
1909 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1910 /// these "soft stalls" differently than the hard stall cycles based on CPU
1911 /// resources and computed by checkHazard(). A fully in-order model
1912 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1913 /// available for scheduling until they are ready. However, a weaker in-order
1914 /// model may use this for heuristics. For example, if a processor has in-order
1915 /// behavior when reading certain resources, this may come into play.
1917  if (!SU->isUnbuffered)
1918  return 0;
1919 
1920  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1921  if (ReadyCycle > CurrCycle)
1922  return ReadyCycle - CurrCycle;
1923  return 0;
1924 }
1925 
1926 /// Compute the next cycle at which the given processor resource can be
1927 /// scheduled.
1928 unsigned SchedBoundary::
1929 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1930  unsigned NextUnreserved = ReservedCycles[PIdx];
1931  // If this resource has never been used, always return cycle zero.
1932  if (NextUnreserved == InvalidCycle)
1933  return 0;
1934  // For bottom-up scheduling add the cycles needed for the current operation.
1935  if (!isTop())
1936  NextUnreserved += Cycles;
1937  return NextUnreserved;
1938 }
1939 
1940 /// Does this SU have a hazard within the current instruction group.
1941 ///
1942 /// The scheduler supports two modes of hazard recognition. The first is the
1943 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1944 /// supports highly complicated in-order reservation tables
1945 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1946 ///
1947 /// The second is a streamlined mechanism that checks for hazards based on
1948 /// simple counters that the scheduler itself maintains. It explicitly checks
1949 /// for instruction dispatch limitations, including the number of micro-ops that
1950 /// can dispatch per cycle.
1951 ///
1952 /// TODO: Also check whether the SU must start a new group.
1954  if (HazardRec->isEnabled()
1955  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1956  return true;
1957  }
1958 
1959  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1960  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1961  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1962  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1963  return true;
1964  }
1965 
1966  if (CurrMOps > 0 &&
1967  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1968  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1969  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1970  << (isTop() ? "begin" : "end") << " group\n");
1971  return true;
1972  }
1973 
1974  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1975  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1976  for (const MCWriteProcResEntry &PE :
1977  make_range(SchedModel->getWriteProcResBegin(SC),
1978  SchedModel->getWriteProcResEnd(SC))) {
1979  unsigned ResIdx = PE.ProcResourceIdx;
1980  unsigned Cycles = PE.Cycles;
1981  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1982  if (NRCycle > CurrCycle) {
1983 #ifndef NDEBUG
1984  MaxObservedStall = std::max(Cycles, MaxObservedStall);
1985 #endif
1986  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1987  << SchedModel->getResourceName(ResIdx) << "="
1988  << NRCycle << "c\n");
1989  return true;
1990  }
1991  }
1992  }
1993  return false;
1994 }
1995 
1996 // Find the unscheduled node in ReadySUs with the highest latency.
1997 unsigned SchedBoundary::
1999  SUnit *LateSU = nullptr;
2000  unsigned RemLatency = 0;
2001  for (SUnit *SU : ReadySUs) {
2002  unsigned L = getUnscheduledLatency(SU);
2003  if (L > RemLatency) {
2004  RemLatency = L;
2005  LateSU = SU;
2006  }
2007  }
2008  if (LateSU) {
2009  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2010  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2011  }
2012  return RemLatency;
2013 }
2014 
2015 // Count resources in this zone and the remaining unscheduled
2016 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2017 // resource index, or zero if the zone is issue limited.
2018 unsigned SchedBoundary::
2019 getOtherResourceCount(unsigned &OtherCritIdx) {
2020  OtherCritIdx = 0;
2021  if (!SchedModel->hasInstrSchedModel())
2022  return 0;
2023 
2024  unsigned OtherCritCount = Rem->RemIssueCount
2025  + (RetiredMOps * SchedModel->getMicroOpFactor());
2026  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2027  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2028  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2029  PIdx != PEnd; ++PIdx) {
2030  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2031  if (OtherCount > OtherCritCount) {
2032  OtherCritCount = OtherCount;
2033  OtherCritIdx = PIdx;
2034  }
2035  }
2036  if (OtherCritIdx) {
2037  LLVM_DEBUG(
2038  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2039  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2040  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2041  }
2042  return OtherCritCount;
2043 }
2044 
2045 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2046  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2047 
2048 #ifndef NDEBUG
2049  // ReadyCycle was been bumped up to the CurrCycle when this node was
2050  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2051  // scheduling, so may now be greater than ReadyCycle.
2052  if (ReadyCycle > CurrCycle)
2053  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2054 #endif
2055 
2056  if (ReadyCycle < MinReadyCycle)
2057  MinReadyCycle = ReadyCycle;
2058 
2059  // Check for interlocks first. For the purpose of other heuristics, an
2060  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2061  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2062  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2063  Available.size() >= ReadyListLimit)
2064  Pending.push(SU);
2065  else
2066  Available.push(SU);
2067 }
2068 
2069 /// Move the boundary of scheduled code by one cycle.
2070 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2071  if (SchedModel->getMicroOpBufferSize() == 0) {
2072  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2073  "MinReadyCycle uninitialized");
2074  if (MinReadyCycle > NextCycle)
2075  NextCycle = MinReadyCycle;
2076  }
2077  // Update the current micro-ops, which will issue in the next cycle.
2078  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2079  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2080 
2081  // Decrement DependentLatency based on the next cycle.
2082  if ((NextCycle - CurrCycle) > DependentLatency)
2083  DependentLatency = 0;
2084  else
2085  DependentLatency -= (NextCycle - CurrCycle);
2086 
2087  if (!HazardRec->isEnabled()) {
2088  // Bypass HazardRec virtual calls.
2089  CurrCycle = NextCycle;
2090  } else {
2091  // Bypass getHazardType calls in case of long latency.
2092  for (; CurrCycle != NextCycle; ++CurrCycle) {
2093  if (isTop())
2094  HazardRec->AdvanceCycle();
2095  else
2096  HazardRec->RecedeCycle();
2097  }
2098  }
2099  CheckPending = true;
2100  IsResourceLimited =
2101  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2102  getScheduledLatency());
2103 
2104  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2105  << '\n');
2106 }
2107 
2108 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2109  ExecutedResCounts[PIdx] += Count;
2110  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2111  MaxExecutedResCount = ExecutedResCounts[PIdx];
2112 }
2113 
2114 /// Add the given processor resource to this scheduled zone.
2115 ///
2116 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2117 /// during which this resource is consumed.
2118 ///
2119 /// \return the next cycle at which the instruction may execute without
2120 /// oversubscribing resources.
2121 unsigned SchedBoundary::
2122 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2123  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2124  unsigned Count = Factor * Cycles;
2125  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2126  << Cycles << "x" << Factor << "u\n");
2127 
2128  // Update Executed resources counts.
2129  incExecutedResources(PIdx, Count);
2130  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2131  Rem->RemainingCounts[PIdx] -= Count;
2132 
2133  // Check if this resource exceeds the current critical resource. If so, it
2134  // becomes the critical resource.
2135  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2136  ZoneCritResIdx = PIdx;
2137  LLVM_DEBUG(dbgs() << " *** Critical resource "
2138  << SchedModel->getResourceName(PIdx) << ": "
2139  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2140  << "c\n");
2141  }
2142  // For reserved resources, record the highest cycle using the resource.
2143  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2144  if (NextAvailable > CurrCycle) {
2145  LLVM_DEBUG(dbgs() << " Resource conflict: "
2146  << SchedModel->getProcResource(PIdx)->Name
2147  << " reserved until @" << NextAvailable << "\n");
2148  }
2149  return NextAvailable;
2150 }
2151 
2152 /// Move the boundary of scheduled code by one SUnit.
2154  // Update the reservation table.
2155  if (HazardRec->isEnabled()) {
2156  if (!isTop() && SU->isCall) {
2157  // Calls are scheduled with their preceding instructions. For bottom-up
2158  // scheduling, clear the pipeline state before emitting.
2159  HazardRec->Reset();
2160  }
2161  HazardRec->EmitInstruction(SU);
2162  // Scheduling an instruction may have made pending instructions available.
2163  CheckPending = true;
2164  }
2165  // checkHazard should prevent scheduling multiple instructions per cycle that
2166  // exceed the issue width.
2167  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2168  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2169  assert(
2170  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2171  "Cannot schedule this instruction's MicroOps in the current cycle.");
2172 
2173  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2174  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2175 
2176  unsigned NextCycle = CurrCycle;
2177  switch (SchedModel->getMicroOpBufferSize()) {
2178  case 0:
2179  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2180  break;
2181  case 1:
2182  if (ReadyCycle > NextCycle) {
2183  NextCycle = ReadyCycle;
2184  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2185  }
2186  break;
2187  default:
2188  // We don't currently model the OOO reorder buffer, so consider all
2189  // scheduled MOps to be "retired". We do loosely model in-order resource
2190  // latency. If this instruction uses an in-order resource, account for any
2191  // likely stall cycles.
2192  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2193  NextCycle = ReadyCycle;
2194  break;
2195  }
2196  RetiredMOps += IncMOps;
2197 
2198  // Update resource counts and critical resource.
2199  if (SchedModel->hasInstrSchedModel()) {
2200  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2201  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2202  Rem->RemIssueCount -= DecRemIssue;
2203  if (ZoneCritResIdx) {
2204  // Scale scheduled micro-ops for comparing with the critical resource.
2205  unsigned ScaledMOps =
2206  RetiredMOps * SchedModel->getMicroOpFactor();
2207 
2208  // If scaled micro-ops are now more than the previous critical resource by
2209  // a full cycle, then micro-ops issue becomes critical.
2210  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2211  >= (int)SchedModel->getLatencyFactor()) {
2212  ZoneCritResIdx = 0;
2213  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2214  << ScaledMOps / SchedModel->getLatencyFactor()
2215  << "c\n");
2216  }
2217  }
2219  PI = SchedModel->getWriteProcResBegin(SC),
2220  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2221  unsigned RCycle =
2222  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2223  if (RCycle > NextCycle)
2224  NextCycle = RCycle;
2225  }
2226  if (SU->hasReservedResource) {
2227  // For reserved resources, record the highest cycle using the resource.
2228  // For top-down scheduling, this is the cycle in which we schedule this
2229  // instruction plus the number of cycles the operations reserves the
2230  // resource. For bottom-up is it simply the instruction's cycle.
2232  PI = SchedModel->getWriteProcResBegin(SC),
2233  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2234  unsigned PIdx = PI->ProcResourceIdx;
2235  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2236  if (isTop()) {
2237  ReservedCycles[PIdx] =
2238  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2239  }
2240  else
2241  ReservedCycles[PIdx] = NextCycle;
2242  }
2243  }
2244  }
2245  }
2246  // Update ExpectedLatency and DependentLatency.
2247  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2248  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2249  if (SU->getDepth() > TopLatency) {
2250  TopLatency = SU->getDepth();
2251  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2252  << SU->NodeNum << ") " << TopLatency << "c\n");
2253  }
2254  if (SU->getHeight() > BotLatency) {
2255  BotLatency = SU->getHeight();
2256  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2257  << SU->NodeNum << ") " << BotLatency << "c\n");
2258  }
2259  // If we stall for any reason, bump the cycle.
2260  if (NextCycle > CurrCycle)
2261  bumpCycle(NextCycle);
2262  else
2263  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2264  // resource limited. If a stall occurred, bumpCycle does this.
2265  IsResourceLimited =
2266  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2267  getScheduledLatency());
2268 
2269  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2270  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2271  // one cycle. Since we commonly reach the max MOps here, opportunistically
2272  // bump the cycle to avoid uselessly checking everything in the readyQ.
2273  CurrMOps += IncMOps;
2274 
2275  // Bump the cycle count for issue group constraints.
2276  // This must be done after NextCycle has been adjust for all other stalls.
2277  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2278  // currCycle to X.
2279  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2280  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2281  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2282  << " group\n");
2283  bumpCycle(++NextCycle);
2284  }
2285 
2286  while (CurrMOps >= SchedModel->getIssueWidth()) {
2287  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2288  << CurrCycle << '\n');
2289  bumpCycle(++NextCycle);
2290  }
2291  LLVM_DEBUG(dumpScheduledState());
2292 }
2293 
2294 /// Release pending ready nodes in to the available queue. This makes them
2295 /// visible to heuristics.
2297  // If the available queue is empty, it is safe to reset MinReadyCycle.
2298  if (Available.empty())
2299  MinReadyCycle = std::numeric_limits<unsigned>::max();
2300 
2301  // Check to see if any of the pending instructions are ready to issue. If
2302  // so, add them to the available queue.
2303  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2304  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2305  SUnit *SU = *(Pending.begin()+i);
2306  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2307 
2308  if (ReadyCycle < MinReadyCycle)
2309  MinReadyCycle = ReadyCycle;
2310 
2311  if (!IsBuffered && ReadyCycle > CurrCycle)
2312  continue;
2313 
2314  if (checkHazard(SU))
2315  continue;
2316 
2317  if (Available.size() >= ReadyListLimit)
2318  break;
2319 
2320  Available.push(SU);
2321  Pending.remove(Pending.begin()+i);
2322  --i; --e;
2323  }
2324  CheckPending = false;
2325 }
2326 
2327 /// Remove SU from the ready set for this boundary.
2329  if (Available.isInQueue(SU))
2330  Available.remove(Available.find(SU));
2331  else {
2332  assert(Pending.isInQueue(SU) && "bad ready count");
2333  Pending.remove(Pending.find(SU));
2334  }
2335 }
2336 
2337 /// If this queue only has one ready candidate, return it. As a side effect,
2338 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2339 /// one node is ready. If multiple instructions are ready, return NULL.
2341  if (CheckPending)
2342  releasePending();
2343 
2344  if (CurrMOps > 0) {
2345  // Defer any ready instrs that now have a hazard.
2346  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2347  if (checkHazard(*I)) {
2348  Pending.push(*I);
2349  I = Available.remove(I);
2350  continue;
2351  }
2352  ++I;
2353  }
2354  }
2355  for (unsigned i = 0; Available.empty(); ++i) {
2356 // FIXME: Re-enable assert once PR20057 is resolved.
2357 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2358 // "permanent hazard");
2359  (void)i;
2360  bumpCycle(CurrCycle + 1);
2361  releasePending();
2362  }
2363 
2364  LLVM_DEBUG(Pending.dump());
2365  LLVM_DEBUG(Available.dump());
2366 
2367  if (Available.size() == 1)
2368  return *Available.begin();
2369  return nullptr;
2370 }
2371 
2372 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2373 // This is useful information to dump after bumpNode.
2374 // Note that the Queue contents are more useful before pickNodeFromQueue.
2376  unsigned ResFactor;
2377  unsigned ResCount;
2378  if (ZoneCritResIdx) {
2379  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2380  ResCount = getResourceCount(ZoneCritResIdx);
2381  } else {
2382  ResFactor = SchedModel->getMicroOpFactor();
2383  ResCount = RetiredMOps * ResFactor;
2384  }
2385  unsigned LFactor = SchedModel->getLatencyFactor();
2386  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2387  << " Retired: " << RetiredMOps;
2388  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2389  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2390  << ResCount / ResFactor << " "
2391  << SchedModel->getResourceName(ZoneCritResIdx)
2392  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2393  << (IsResourceLimited ? " - Resource" : " - Latency")
2394  << " limited.\n";
2395 }
2396 #endif
2397 
2398 //===----------------------------------------------------------------------===//
2399 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2400 //===----------------------------------------------------------------------===//
2401 
2404  const TargetSchedModel *SchedModel) {
2405  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2406  return;
2407 
2408  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2410  PI = SchedModel->getWriteProcResBegin(SC),
2411  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2412  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2413  ResDelta.CritResources += PI->Cycles;
2414  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2415  ResDelta.DemandedResources += PI->Cycles;
2416  }
2417 }
2418 
2419 /// Compute remaining latency. We need this both to determine whether the
2420 /// overall schedule has become latency-limited and whether the instructions
2421 /// outside this zone are resource or latency limited.
2422 ///
2423 /// The "dependent" latency is updated incrementally during scheduling as the
2424 /// max height/depth of scheduled nodes minus the cycles since it was
2425 /// scheduled:
2426 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2427 ///
2428 /// The "independent" latency is the max ready queue depth:
2429 /// ILat = max N.depth for N in Available|Pending
2430 ///
2431 /// RemainingLatency is the greater of independent and dependent latency.
2432 ///
2433 /// These computations are expensive, especially in DAGs with many edges, so
2434 /// only do them if necessary.
2435 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2436  unsigned RemLatency = CurrZone.getDependentLatency();
2437  RemLatency = std::max(RemLatency,
2438  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2439  RemLatency = std::max(RemLatency,
2440  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2441  return RemLatency;
2442 }
2443 
2444 /// Returns true if the current cycle plus remaning latency is greater than
2445 /// the critical path in the scheduling region.
2446 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2447  SchedBoundary &CurrZone,
2448  bool ComputeRemLatency,
2449  unsigned &RemLatency) const {
2450  // The current cycle is already greater than the critical path, so we are
2451  // already latency limited and don't need to compute the remaining latency.
2452  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2453  return true;
2454 
2455  // If we haven't scheduled anything yet, then we aren't latency limited.
2456  if (CurrZone.getCurrCycle() == 0)
2457  return false;
2458 
2459  if (ComputeRemLatency)
2460  RemLatency = computeRemLatency(CurrZone);
2461 
2462  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2463 }
2464 
2465 /// Set the CandPolicy given a scheduling zone given the current resources and
2466 /// latencies inside and outside the zone.
2467 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2468  SchedBoundary &CurrZone,
2469  SchedBoundary *OtherZone) {
2470  // Apply preemptive heuristics based on the total latency and resources
2471  // inside and outside this zone. Potential stalls should be considered before
2472  // following this policy.
2473 
2474  // Compute the critical resource outside the zone.
2475  unsigned OtherCritIdx = 0;
2476  unsigned OtherCount =
2477  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2478 
2479  bool OtherResLimited = false;
2480  unsigned RemLatency = 0;
2481  bool RemLatencyComputed = false;
2482  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2483  RemLatency = computeRemLatency(CurrZone);
2484  RemLatencyComputed = true;
2485  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2486  OtherCount, RemLatency);
2487  }
2488 
2489  // Schedule aggressively for latency in PostRA mode. We don't check for
2490  // acyclic latency during PostRA, and highly out-of-order processors will
2491  // skip PostRA scheduling.
2492  if (!OtherResLimited &&
2493  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2494  RemLatency))) {
2495  Policy.ReduceLatency |= true;
2496  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2497  << " RemainingLatency " << RemLatency << " + "
2498  << CurrZone.getCurrCycle() << "c > CritPath "
2499  << Rem.CriticalPath << "\n");
2500  }
2501  // If the same resource is limiting inside and outside the zone, do nothing.
2502  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2503  return;
2504 
2505  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2506  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2507  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2508  } if (OtherResLimited) dbgs()
2509  << " RemainingLimit: "
2510  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2511  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2512  << " Latency limited both directions.\n");
2513 
2514  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2515  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2516 
2517  if (OtherResLimited)
2518  Policy.DemandResIdx = OtherCritIdx;
2519 }
2520 
2521 #ifndef NDEBUG
2524  switch (Reason) {
2525  case NoCand: return "NOCAND ";
2526  case Only1: return "ONLY1 ";
2527  case PhysReg: return "PHYS-REG ";
2528  case RegExcess: return "REG-EXCESS";
2529  case RegCritical: return "REG-CRIT ";
2530  case Stall: return "STALL ";
2531  case Cluster: return "CLUSTER ";
2532  case Weak: return "WEAK ";
2533  case RegMax: return "REG-MAX ";
2534  case ResourceReduce: return "RES-REDUCE";
2535  case ResourceDemand: return "RES-DEMAND";
2536  case TopDepthReduce: return "TOP-DEPTH ";
2537  case TopPathReduce: return "TOP-PATH ";
2538  case BotHeightReduce:return "BOT-HEIGHT";
2539  case BotPathReduce: return "BOT-PATH ";
2540  case NextDefUse: return "DEF-USE ";
2541  case NodeOrder: return "ORDER ";
2542  };
2543  llvm_unreachable("Unknown reason!");
2544 }
2545 
2547  PressureChange P;
2548  unsigned ResIdx = 0;
2549  unsigned Latency = 0;
2550  switch (Cand.Reason) {
2551  default:
2552  break;
2553  case RegExcess:
2554  P = Cand.RPDelta.Excess;
2555  break;
2556  case RegCritical:
2557  P = Cand.RPDelta.CriticalMax;
2558  break;
2559  case RegMax:
2560  P = Cand.RPDelta.CurrentMax;
2561  break;
2562  case ResourceReduce:
2563  ResIdx = Cand.Policy.ReduceResIdx;
2564  break;
2565  case ResourceDemand:
2566  ResIdx = Cand.Policy.DemandResIdx;
2567  break;
2568  case TopDepthReduce:
2569  Latency = Cand.SU->getDepth();
2570  break;
2571  case TopPathReduce:
2572  Latency = Cand.SU->getHeight();
2573  break;
2574  case BotHeightReduce:
2575  Latency = Cand.SU->getHeight();
2576  break;
2577  case BotPathReduce:
2578  Latency = Cand.SU->getDepth();
2579  break;
2580  }
2581  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2582  if (P.isValid())
2583  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2584  << ":" << P.getUnitInc() << " ";
2585  else
2586  dbgs() << " ";
2587  if (ResIdx)
2588  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2589  else
2590  dbgs() << " ";
2591  if (Latency)
2592  dbgs() << " " << Latency << " cycles ";
2593  else
2594  dbgs() << " ";
2595  dbgs() << '\n';
2596 }
2597 #endif
2598 
2599 namespace llvm {
2600 /// Return true if this heuristic determines order.
2601 bool tryLess(int TryVal, int CandVal,
2605  if (TryVal < CandVal) {
2606  TryCand.Reason = Reason;
2607  return true;
2608  }
2609  if (TryVal > CandVal) {
2610  if (Cand.Reason > Reason)
2611  Cand.Reason = Reason;
2612  return true;
2613  }
2614  return false;
2615 }
2616 
2617 bool tryGreater(int TryVal, int CandVal,
2621  if (TryVal > CandVal) {
2622  TryCand.Reason = Reason;
2623  return true;
2624  }
2625  if (TryVal < CandVal) {
2626  if (Cand.Reason > Reason)
2627  Cand.Reason = Reason;
2628  return true;
2629  }
2630  return false;
2631 }
2632 
2635  SchedBoundary &Zone) {
2636  if (Zone.isTop()) {
2637  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2638  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2639  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2640  return true;
2641  }
2642  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2643  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2644  return true;
2645  } else {
2646  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2647  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2648  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2649  return true;
2650  }
2651  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2652  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2653  return true;
2654  }
2655  return false;
2656 }
2657 } // end namespace llvm
2658 
2659 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2660  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2661  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2662 }
2663 
2665  tracePick(Cand.Reason, Cand.AtTop);
2666 }
2667 
2669  assert(dag->hasVRegLiveness() &&
2670  "(PreRA)GenericScheduler needs vreg liveness");
2671  DAG = static_cast<ScheduleDAGMILive*>(dag);
2672  SchedModel = DAG->getSchedModel();
2673  TRI = DAG->TRI;
2674 
2675  Rem.init(DAG, SchedModel);
2676  Top.init(DAG, SchedModel, &Rem);
2677  Bot.init(DAG, SchedModel, &Rem);
2678 
2679  // Initialize resource counts.
2680 
2681  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2682  // are disabled, then these HazardRecs will be disabled.
2683  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2684  if (!Top.HazardRec) {
2685  Top.HazardRec =
2686  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2687  Itin, DAG);
2688  }
2689  if (!Bot.HazardRec) {
2690  Bot.HazardRec =
2691  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2692  Itin, DAG);
2693  }
2694  TopCand.SU = nullptr;
2695  BotCand.SU = nullptr;
2696 }
2697 
2698 /// Initialize the per-region scheduling policy.
2701  unsigned NumRegionInstrs) {
2702  const MachineFunction &MF = *Begin->getMF();
2703  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2704 
2705  // Avoid setting up the register pressure tracker for small regions to save
2706  // compile time. As a rough heuristic, only track pressure when the number of
2707  // schedulable instructions exceeds half the integer register file.
2708  RegionPolicy.ShouldTrackPressure = true;
2709  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2710  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2711  if (TLI->isTypeLegal(LegalIntVT)) {
2712  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2713  TLI->getRegClassFor(LegalIntVT));
2714  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2715  }
2716  }
2717 
2718  // For generic targets, we default to bottom-up, because it's simpler and more
2719  // compile-time optimizations have been implemented in that direction.
2720  RegionPolicy.OnlyBottomUp = true;
2721 
2722  // Allow the subtarget to override default policy.
2723  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2724 
2725  // After subtarget overrides, apply command line options.
2726  if (!EnableRegPressure)
2727  RegionPolicy.ShouldTrackPressure = false;
2728 
2729  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2730  // e.g. -misched-bottomup=false allows scheduling in both directions.
2732  "-misched-topdown incompatible with -misched-bottomup");
2733  if (ForceBottomUp.getNumOccurrences() > 0) {
2734  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2735  if (RegionPolicy.OnlyBottomUp)
2736  RegionPolicy.OnlyTopDown = false;
2737  }
2738  if (ForceTopDown.getNumOccurrences() > 0) {
2739  RegionPolicy.OnlyTopDown = ForceTopDown;
2740  if (RegionPolicy.OnlyTopDown)
2741  RegionPolicy.OnlyBottomUp = false;
2742  }
2743 }
2744 
2746  // Cannot completely remove virtual function even in release mode.
2747 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2748  dbgs() << "GenericScheduler RegionPolicy: "
2749  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2750  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2751  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2752  << "\n";
2753 #endif
2754 }
2755 
2756 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2757 /// critical path by more cycles than it takes to drain the instruction buffer.
2758 /// We estimate an upper bounds on in-flight instructions as:
2759 ///
2760 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2761 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2762 /// InFlightResources = InFlightIterations * LoopResources
2763 ///
2764 /// TODO: Check execution resources in addition to IssueCount.
2766  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2767  return;
2768 
2769  // Scaled number of cycles per loop iteration.
2770  unsigned IterCount =
2771  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2772  Rem.RemIssueCount);
2773  // Scaled acyclic critical path.
2774  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2775  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2776  unsigned InFlightCount =
2777  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2778  unsigned BufferLimit =
2779  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2780 
2781  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2782 
2783  LLVM_DEBUG(
2784  dbgs() << "IssueCycles="
2785  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2786  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2787  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2788  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2789  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2790  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2791 }
2792 
2794  Rem.CriticalPath = DAG->ExitSU.getDepth();
2795 
2796  // Some roots may not feed into ExitSU. Check all of them in case.
2797  for (const SUnit *SU : Bot.Available) {
2798  if (SU->getDepth() > Rem.CriticalPath)
2799  Rem.CriticalPath = SU->getDepth();
2800  }
2801  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2802  if (DumpCriticalPathLength) {
2803  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2804  }
2805 
2806  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2807  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2808  checkAcyclicLatency();
2809  }
2810 }
2811 
2812 namespace llvm {
2813 bool tryPressure(const PressureChange &TryP,
2814  const PressureChange &CandP,
2818  const TargetRegisterInfo *TRI,
2819  const MachineFunction &MF) {
2820  // If one candidate decreases and the other increases, go with it.
2821  // Invalid candidates have UnitInc==0.
2822  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2823  Reason)) {
2824  return true;
2825  }
2826  // Do not compare the magnitude of pressure changes between top and bottom
2827  // boundary.
2828  if (Cand.AtTop != TryCand.AtTop)
2829  return false;
2830 
2831  // If both candidates affect the same set in the same boundary, go with the
2832  // smallest increase.
2833  unsigned TryPSet = TryP.getPSetOrMax();
2834  unsigned CandPSet = CandP.getPSetOrMax();
2835  if (TryPSet == CandPSet) {
2836  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2837  Reason);
2838  }
2839 
2840  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2842 
2843  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2845 
2846  // If the candidates are decreasing pressure, reverse priority.
2847  if (TryP.getUnitInc() < 0)
2848  std::swap(TryRank, CandRank);
2849  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2850 }
2851 
2852 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2853  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2854 }
2855 
2856 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2857 /// their physreg def/use.
2858 ///
2859 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2860 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2861 /// with the operation that produces or consumes the physreg. We'll do this when
2862 /// regalloc has support for parallel copies.
2863 int biasPhysReg(const SUnit *SU, bool isTop) {
2864  const MachineInstr *MI = SU->getInstr();
2865 
2866  if (MI->isCopy()) {
2867  unsigned ScheduledOper = isTop ? 1 : 0;
2868  unsigned UnscheduledOper = isTop ? 0 : 1;
2869  // If we have already scheduled the physreg produce/consumer, immediately
2870  // schedule the copy.
2872  MI->getOperand(ScheduledOper).getReg()))
2873  return 1;
2874  // If the physreg is at the boundary, defer it. Otherwise schedule it
2875  // immediately to free the dependent. We can hoist the copy later.
2876  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2878  MI->getOperand(UnscheduledOper).getReg()))
2879  return AtBoundary ? -1 : 1;
2880  }
2881 
2882  if (MI->isMoveImmediate()) {
2883  // If we have a move immediate and all successors have been assigned, bias
2884  // towards scheduling this later. Make sure all register defs are to
2885  // physical registers.
2886  bool DoBias = true;
2887  for (const MachineOperand &Op : MI->defs()) {
2888  if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
2889  DoBias = false;
2890  break;
2891  }
2892  }
2893 
2894  if (DoBias)
2895  return isTop ? -1 : 1;
2896  }
2897 
2898  return 0;
2899 }
2900 } // end namespace llvm
2901 
2903  bool AtTop,
2904  const RegPressureTracker &RPTracker,
2905  RegPressureTracker &TempTracker) {
2906  Cand.SU = SU;
2907  Cand.AtTop = AtTop;
2908  if (DAG->isTrackingPressure()) {
2909  if (AtTop) {
2910  TempTracker.getMaxDownwardPressureDelta(
2911  Cand.SU->getInstr(),
2912  Cand.RPDelta,
2913  DAG->getRegionCriticalPSets(),
2915  } else {
2916  if (VerifyScheduling) {
2917  TempTracker.getMaxUpwardPressureDelta(
2918  Cand.SU->getInstr(),
2919  &DAG->getPressureDiff(Cand.SU),
2920  Cand.RPDelta,
2921  DAG->getRegionCriticalPSets(),
2923  } else {
2924  RPTracker.getUpwardPressureDelta(
2925  Cand.SU->getInstr(),
2926  DAG->getPressureDiff(Cand.SU),
2927  Cand.RPDelta,
2928  DAG->getRegionCriticalPSets(),
2930  }
2931  }
2932  }
2933  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2934  << " Try SU(" << Cand.SU->NodeNum << ") "
2935  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2936  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2937 }
2938 
2939 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2940 /// hierarchical. This may be more efficient than a graduated cost model because
2941 /// we don't need to evaluate all aspects of the model for each node in the
2942 /// queue. But it's really done to make the heuristics easier to debug and
2943 /// statistically analyze.
2944 ///
2945 /// \param Cand provides the policy and current best candidate.
2946 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2947 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2948 // if Cand is from a different zone than TryCand.
2950  SchedCandidate &TryCand,
2951  SchedBoundary *Zone) const {
2952  // Initialize the candidate if needed.
2953  if (!Cand.isValid()) {
2954  TryCand.Reason = NodeOrder;
2955  return;
2956  }
2957 
2958  // Bias PhysReg Defs and copies to their uses and defined respectively.
2959  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
2960  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
2961  return;
2962 
2963  // Avoid exceeding the target's limit.
2964  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2965  Cand.RPDelta.Excess,
2966  TryCand, Cand, RegExcess, TRI,
2967  DAG->MF))
2968  return;
2969 
2970  // Avoid increasing the max critical pressure in the scheduled region.
2971  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2972  Cand.RPDelta.CriticalMax,
2973  TryCand, Cand, RegCritical, TRI,
2974  DAG->MF))
2975  return;
2976 
2977  // We only compare a subset of features when comparing nodes between
2978  // Top and Bottom boundary. Some properties are simply incomparable, in many
2979  // other instances we should only override the other boundary if something
2980  // is a clear good pick on one boundary. Skip heuristics that are more
2981  // "tie-breaking" in nature.
2982  bool SameBoundary = Zone != nullptr;
2983  if (SameBoundary) {
2984  // For loops that are acyclic path limited, aggressively schedule for
2985  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2986  // heuristics to take precedence.
2987  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2988  tryLatency(TryCand, Cand, *Zone))
2989  return;
2990 
2991  // Prioritize instructions that read unbuffered resources by stall cycles.
2992  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2993  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2994  return;
2995  }
2996 
2997  // Keep clustered nodes together to encourage downstream peephole
2998  // optimizations which may reduce resource requirements.
2999  //
3000  // This is a best effort to set things up for a post-RA pass. Optimizations
3001  // like generating loads of multiple registers should ideally be done within
3002  // the scheduler pass by combining the loads during DAG postprocessing.
3003  const SUnit *CandNextClusterSU =
3004  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3005  const SUnit *TryCandNextClusterSU =
3006  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3007  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3008  Cand.SU == CandNextClusterSU,
3009  TryCand, Cand, Cluster))
3010  return;
3011 
3012  if (SameBoundary) {
3013  // Weak edges are for clustering and other constraints.
3014  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3015  getWeakLeft(Cand.SU, Cand.AtTop),
3016  TryCand, Cand, Weak))
3017  return;
3018  }
3019 
3020  // Avoid increasing the max pressure of the entire region.
3021  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3022  Cand.RPDelta.CurrentMax,
3023  TryCand, Cand, RegMax, TRI,
3024  DAG->MF))
3025  return;
3026 
3027  if (SameBoundary) {
3028  // Avoid critical resource consumption and balance the schedule.
3029  TryCand.initResourceDelta(DAG, SchedModel);
3031  TryCand, Cand, ResourceReduce))
3032  return;
3035  TryCand, Cand, ResourceDemand))
3036  return;
3037 
3038  // Avoid serializing long latency dependence chains.
3039  // For acyclic path limited loops, latency was already checked above.
3040  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3041  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3042  return;
3043 
3044  // Fall through to original instruction order.
3045  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3046  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3047  TryCand.Reason = NodeOrder;
3048  }
3049  }
3050 }
3051 
3052 /// Pick the best candidate from the queue.
3053 ///
3054 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3055 /// DAG building. To adjust for the current scheduling location we need to
3056 /// maintain the number of vreg uses remaining to be top-scheduled.
3058  const CandPolicy &ZonePolicy,
3059  const RegPressureTracker &RPTracker,
3060  SchedCandidate &Cand) {
3061  // getMaxPressureDelta temporarily modifies the tracker.
3062  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3063 
3064  ReadyQueue &Q = Zone.Available;
3065  for (SUnit *SU : Q) {
3066 
3067  SchedCandidate TryCand(ZonePolicy);
3068  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3069  // Pass SchedBoundary only when comparing nodes from the same boundary.
3070  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3071  tryCandidate(Cand, TryCand, ZoneArg);
3072  if (TryCand.Reason != NoCand) {
3073  // Initialize resource delta if needed in case future heuristics query it.
3074  if (TryCand.ResDelta == SchedResourceDelta())
3075  TryCand.initResourceDelta(DAG, SchedModel);
3076  Cand.setBest(TryCand);
3077  LLVM_DEBUG(traceCandidate(Cand));
3078  }
3079  }
3080 }
3081 
3082 /// Pick the best candidate node from either the top or bottom queue.
3084  // Schedule as far as possible in the direction of no choice. This is most
3085  // efficient, but also provides the best heuristics for CriticalPSets.
3086  if (SUnit *SU = Bot.pickOnlyChoice()) {
3087  IsTopNode = false;
3088  tracePick(Only1, false);
3089  return SU;
3090  }
3091  if (SUnit *SU = Top.pickOnlyChoice()) {
3092  IsTopNode = true;
3093  tracePick(Only1, true);
3094  return SU;
3095  }
3096  // Set the bottom-up policy based on the state of the current bottom zone and
3097  // the instructions outside the zone, including the top zone.
3098  CandPolicy BotPolicy;
3099  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3100  // Set the top-down policy based on the state of the current top zone and
3101  // the instructions outside the zone, including the bottom zone.
3102  CandPolicy TopPolicy;
3103  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3104 
3105  // See if BotCand is still valid (because we previously scheduled from Top).
3106  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3107  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3108  BotCand.Policy != BotPolicy) {
3109  BotCand.reset(CandPolicy());
3110  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3111  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3112  } else {
3113  LLVM_DEBUG(traceCandidate(BotCand));
3114 #ifndef NDEBUG
3115  if (VerifyScheduling) {
3116  SchedCandidate TCand;
3117  TCand.reset(CandPolicy());
3118  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3119  assert(TCand.SU == BotCand.SU &&
3120  "Last pick result should correspond to re-picking right now");
3121  }
3122 #endif
3123  }
3124 
3125  // Check if the top Q has a better candidate.
3126  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3127  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3128  TopCand.Policy != TopPolicy) {
3129  TopCand.reset(CandPolicy());
3130  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3131  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3132  } else {
3133  LLVM_DEBUG(traceCandidate(TopCand));
3134 #ifndef NDEBUG
3135  if (VerifyScheduling) {
3136  SchedCandidate TCand;
3137  TCand.reset(CandPolicy());
3138  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3139  assert(TCand.SU == TopCand.SU &&
3140  "Last pick result should correspond to re-picking right now");
3141  }
3142 #endif
3143  }
3144 
3145  // Pick best from BotCand and TopCand.
3146  assert(BotCand.isValid());
3147  assert(TopCand.isValid());
3148  SchedCandidate Cand = BotCand;
3149  TopCand.Reason = NoCand;
3150  tryCandidate(Cand, TopCand, nullptr);
3151  if (TopCand.Reason != NoCand) {
3152  Cand.setBest(TopCand);
3153  LLVM_DEBUG(traceCandidate(Cand));
3154  }
3155 
3156  IsTopNode = Cand.AtTop;
3157  tracePick(Cand);
3158  return Cand.SU;
3159 }
3160 
3161 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3163  if (DAG->top() == DAG->bottom()) {
3164  assert(Top.Available.empty() && Top.Pending.empty() &&
3165  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3166  return nullptr;
3167  }
3168  SUnit *SU;
3169  do {
3170  if (RegionPolicy.OnlyTopDown) {
3171  SU = Top.pickOnlyChoice();
3172  if (!SU) {
3173  CandPolicy NoPolicy;
3174  TopCand.reset(NoPolicy);
3175  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3176  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3177  tracePick(TopCand);
3178  SU = TopCand.SU;
3179  }
3180  IsTopNode = true;
3181  } else if (RegionPolicy.OnlyBottomUp) {
3182  SU = Bot.pickOnlyChoice();
3183  if (!SU) {
3184  CandPolicy NoPolicy;
3185  BotCand.reset(NoPolicy);
3186  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3187  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3188  tracePick(BotCand);
3189  SU = BotCand.SU;
3190  }
3191  IsTopNode = false;
3192  } else {
3193  SU = pickNodeBidirectional(IsTopNode);
3194  }
3195  } while (SU->isScheduled);
3196 
3197  if (SU->isTopReady())
3198  Top.removeReady(SU);
3199  if (SU->isBottomReady())
3200  Bot.removeReady(SU);
3201 
3202  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3203  << *SU->getInstr());
3204  return SU;
3205 }
3206 
3208  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3209  if (!isTop)
3210  ++InsertPos;
3211  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3212 
3213  // Find already scheduled copies with a single physreg dependence and move
3214  // them just above the scheduled instruction.
3215  for (SDep &Dep : Deps) {
3216  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3217  continue;
3218  SUnit *DepSU = Dep.getSUnit();
3219  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3220  continue;
3221  MachineInstr *Copy = DepSU->getInstr();
3222  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3223  continue;
3224  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3225  DAG->dumpNode(*Dep.getSUnit()));
3226  DAG->moveInstruction(Copy, InsertPos);
3227  }
3228 }
3229 
3230 /// Update the scheduler's state after scheduling a node. This is the same node
3231 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3232 /// update it's state based on the current cycle before MachineSchedStrategy
3233 /// does.
3234 ///
3235 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3236 /// them here. See comments in biasPhysReg.
3237 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3238  if (IsTopNode) {
3239  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3240  Top.bumpNode(SU);
3241  if (SU->hasPhysRegUses)
3242  reschedulePhysReg(SU, true);
3243  } else {
3244  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3245  Bot.bumpNode(SU);
3246  if (SU->hasPhysRegDefs)
3247  reschedulePhysReg(SU, false);
3248  }
3249 }
3250 
3251 /// Create the standard converging machine scheduler. This will be used as the
3252 /// default scheduler if the target does not set a default.
3254  ScheduleDAGMILive *DAG =
3255  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3256  // Register DAG post-processors.
3257  //
3258  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3259  // data and pass it to later mutations. Have a single mutation that gathers
3260  // the interesting nodes in one pass.
3262  return DAG;
3263 }
3264 
3266  return createGenericSchedLive(C);
3267 }
3268 
3269 static MachineSchedRegistry
3270 GenericSchedRegistry("converge", "Standard converging scheduler.",
3272 
3273 //===----------------------------------------------------------------------===//
3274 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3275 //===----------------------------------------------------------------------===//
3276 
3278  DAG = Dag;
3279  SchedModel = DAG->getSchedModel();
3280  TRI = DAG->TRI;
3281 
3282  Rem.init(DAG, SchedModel);
3283  Top.init(DAG, SchedModel, &Rem);
3284  BotRoots.clear();
3285 
3286  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3287  // or are disabled, then these HazardRecs will be disabled.
3288  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3289  if (!Top.HazardRec) {
3290  Top.HazardRec =
3292  Itin, DAG);
3293  }
3294 }
3295 
3297  Rem.CriticalPath = DAG->ExitSU.getDepth();
3298 
3299  // Some roots may not feed into ExitSU. Check all of them in case.
3300  for (const SUnit *SU : BotRoots) {
3301  if (SU->getDepth() > Rem.CriticalPath)
3302  Rem.CriticalPath = SU->getDepth();
3303  }
3304  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3305  if (DumpCriticalPathLength) {
3306  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3307  }
3308 }
3309 
3310 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3311 ///
3312 /// \param Cand provides the policy and current best candidate.
3313 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3315  SchedCandidate &TryCand) {
3316  // Initialize the candidate if needed.
3317  if (!Cand.isValid()) {
3318  TryCand.Reason = NodeOrder;
3319  return;
3320  }
3321 
3322  // Prioritize instructions that read unbuffered resources by stall cycles.
3323  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3324  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3325  return;
3326 
3327  // Keep clustered nodes together.
3328  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3329  Cand.SU == DAG->getNextClusterSucc(),
3330  TryCand, Cand, Cluster))
3331  return;
3332 
3333  // Avoid critical resource consumption and balance the schedule.
3334  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3335  TryCand, Cand, ResourceReduce))
3336  return;
3338  Cand.ResDelta.DemandedResources,
3339  TryCand, Cand, ResourceDemand))
3340  return;
3341 
3342  // Avoid serializing long latency dependence chains.
3343  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3344  return;
3345  }
3346 
3347  // Fall through to original instruction order.
3348  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3349  TryCand.Reason = NodeOrder;
3350 }
3351 
3353  ReadyQueue &Q = Top.Available;
3354  for (SUnit *SU : Q) {
3355  SchedCandidate TryCand(Cand.Policy);
3356  TryCand.SU = SU;
3357  TryCand.AtTop = true;
3358  TryCand.initResourceDelta(DAG, SchedModel);
3359  tryCandidate(Cand, TryCand);
3360  if (TryCand.Reason != NoCand) {
3361  Cand.setBest(TryCand);
3362  LLVM_DEBUG(traceCandidate(Cand));
3363  }
3364  }
3365 }
3366 
3367 /// Pick the next node to schedule.
3369  if (DAG->top() == DAG->bottom()) {
3370  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3371  return nullptr;
3372  }
3373  SUnit *SU;
3374  do {
3375  SU = Top.pickOnlyChoice();
3376  if (SU) {
3377  tracePick(Only1, true);
3378  } else {
3379  CandPolicy NoPolicy;
3380  SchedCandidate TopCand(NoPolicy);
3381  // Set the top-down policy based on the state of the current top zone and
3382  // the instructions outside the zone, including the bottom zone.
3383  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3384  pickNodeFromQueue(TopCand);
3385  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3386  tracePick(TopCand);
3387  SU = TopCand.SU;
3388  }
3389  } while (SU->isScheduled);
3390 
3391  IsTopNode = true;
3392  Top.removeReady(SU);
3393 
3394  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3395  << *SU->getInstr());
3396  return SU;
3397 }
3398 
3399 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3400 /// scheduled/remaining flags in the DAG nodes.
3401 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3402  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3403  Top.bumpNode(SU);
3404 }
3405 
3407  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3408  /*RemoveKillFlags=*/true);
3409 }
3410 
3411 //===----------------------------------------------------------------------===//
3412 // ILP Scheduler. Currently for experimental analysis of heuristics.
3413 //===----------------------------------------------------------------------===//
3414 
3415 namespace {
3416 
3417 /// Order nodes by the ILP metric.
3418 struct ILPOrder {
3419  const SchedDFSResult *DFSResult = nullptr;
3420  const BitVector *ScheduledTrees = nullptr;
3421  bool MaximizeILP;
3422 
3423  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3424 
3425  /// Apply a less-than relation on node priority.
3426  ///
3427  /// (Return true if A comes after B in the Q.)
3428  bool operator()(const SUnit *A, const SUnit *B) const {
3429  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3430  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3431  if (SchedTreeA != SchedTreeB) {
3432  // Unscheduled trees have lower priority.
3433  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3434  return ScheduledTrees->test(SchedTreeB);
3435 
3436  // Trees with shallower connections have have lower priority.
3437  if (DFSResult->getSubtreeLevel(SchedTreeA)
3438  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3439  return DFSResult->getSubtreeLevel(SchedTreeA)
3440  < DFSResult->getSubtreeLevel(SchedTreeB);
3441  }
3442  }
3443  if (MaximizeILP)
3444  return DFSResult->getILP(A) < DFSResult->getILP(B);
3445  else
3446  return DFSResult->getILP(A) > DFSResult->getILP(B);
3447  }
3448 };
3449 
3450 /// Schedule based on the ILP metric.
3451 class ILPScheduler : public MachineSchedStrategy {
3452  ScheduleDAGMILive *DAG = nullptr;
3453  ILPOrder Cmp;
3454 
3455  std::vector<SUnit*> ReadyQ;
3456 
3457 public:
3458  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3459 
3460  void initialize(ScheduleDAGMI *dag) override {
3461  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3462  DAG = static_cast<ScheduleDAGMILive*>(dag);
3463  DAG->computeDFSResult();
3464  Cmp.DFSResult = DAG->getDFSResult();
3465  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3466  ReadyQ.clear();
3467  }
3468 
3469  void registerRoots() override {
3470  // Restore the heap in ReadyQ with the updated DFS results.
3471  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3472  }
3473 
3474  /// Implement MachineSchedStrategy interface.
3475  /// -----------------------------------------
3476 
3477  /// Callback to select the highest priority node from the ready Q.
3478  SUnit *pickNode(bool &IsTopNode) override {
3479  if (ReadyQ.empty()) return nullptr;
3480  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3481  SUnit *SU = ReadyQ.back();
3482  ReadyQ.pop_back();
3483  IsTopNode = false;
3484  LLVM_DEBUG(dbgs() << "Pick node "
3485  << "SU(" << SU->NodeNum << ") "
3486  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3487  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3488  << " @"
3489  << DAG->getDFSResult()->getSubtreeLevel(
3490  DAG->getDFSResult()->getSubtreeID(SU))
3491  << '\n'
3492  << "Scheduling " << *SU->getInstr());
3493  return SU;
3494  }
3495 
3496  /// Scheduler callback to notify that a new subtree is scheduled.
3497  void scheduleTree(unsigned SubtreeID) override {
3498  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3499  }
3500 
3501  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3502  /// DFSResults, and resort the priority Q.
3503  void schedNode(SUnit *SU, bool IsTopNode) override {
3504  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3505  }
3506 
3507  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3508 
3509  void releaseBottomNode(SUnit *SU) override {
3510  ReadyQ.push_back(SU);
3511  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3512  }
3513 };
3514 
3515 } // end anonymous namespace
3516 
3518  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3519 }
3521  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3522 }
3523 
3525  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3527  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3528 
3529 //===----------------------------------------------------------------------===//
3530 // Machine Instruction Shuffler for Correctness Testing
3531 //===----------------------------------------------------------------------===//
3532 
3533 #ifndef NDEBUG
3534 namespace {
3535 
3536 /// Apply a less-than relation on the node order, which corresponds to the
3537 /// instruction order prior to scheduling. IsReverse implements greater-than.
3538 template<bool IsReverse>
3539 struct SUnitOrder {
3540  bool operator()(SUnit *A, SUnit *B) const {
3541  if (IsReverse)
3542  return A->NodeNum > B->NodeNum;
3543  else
3544  return A->NodeNum < B->NodeNum;
3545  }
3546 };
3547 
3548 /// Reorder instructions as much as possible.
3549 class InstructionShuffler : public MachineSchedStrategy {
3550  bool IsAlternating;
3551  bool IsTopDown;
3552 
3553  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3554  // gives nodes with a higher number higher priority causing the latest
3555  // instructions to be scheduled first.
3556  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3557  TopQ;
3558 
3559  // When scheduling bottom-up, use greater-than as the queue priority.
3561  BottomQ;
3562 
3563 public:
3564  InstructionShuffler(bool alternate, bool topdown)
3565  : IsAlternating(alternate), IsTopDown(topdown) {}
3566 
3567  void initialize(ScheduleDAGMI*) override {
3568  TopQ.clear();
3569  BottomQ.clear();
3570  }
3571 
3572  /// Implement MachineSchedStrategy interface.
3573  /// -----------------------------------------
3574 
3575  SUnit *pickNode(bool &IsTopNode) override {
3576  SUnit *SU;
3577  if (IsTopDown) {
3578  do {
3579  if (TopQ.empty()) return nullptr;
3580  SU = TopQ.top();
3581  TopQ.pop();
3582  } while (SU->isScheduled);
3583  IsTopNode = true;
3584  } else {
3585  do {
3586  if (BottomQ.empty()) return nullptr;
3587  SU = BottomQ.top();
3588  BottomQ.pop();
3589  } while (SU->isScheduled);
3590  IsTopNode = false;
3591  }
3592  if (IsAlternating)
3593  IsTopDown = !IsTopDown;
3594  return SU;
3595  }
3596 
3597  void schedNode(SUnit *SU, bool IsTopNode) override {}
3598 
3599  void releaseTopNode(SUnit *SU) override {
3600  TopQ.push(SU);
3601  }
3602  void releaseBottomNode(SUnit *SU) override {
3603  BottomQ.push(SU);
3604  }
3605 };
3606 
3607 } // end anonymous namespace
3608 
3610  bool Alternate = !ForceTopDown && !ForceBottomUp;
3611  bool TopDown = !ForceBottomUp;
3612  assert((TopDown || !ForceTopDown) &&
3613  "-misched-topdown incompatible with -misched-bottomup");
3614  return new ScheduleDAGMILive(
3615  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3616 }
3617 
3619  "shuffle", "Shuffle machine instructions alternating directions",
3621 #endif // !NDEBUG
3622 
3623 //===----------------------------------------------------------------------===//
3624 // GraphWriter support for ScheduleDAGMILive.
3625 //===----------------------------------------------------------------------===//
3626 
3627 #ifndef NDEBUG
3628 namespace llvm {
3629 
3630 template<> struct GraphTraits<
3632 
3633 template<>
3636 
3637  static std::string getGraphName(const ScheduleDAG *G) {
3638  return G->MF.getName();
3639  }
3640 
3641  static bool renderGraphFromBottomUp() {
3642  return true;
3643  }
3644 
3645  static bool isNodeHidden(const SUnit *Node) {
3646  if (ViewMISchedCutoff == 0)
3647  return false;
3648  return (Node->Preds.size() > ViewMISchedCutoff
3649  || Node->Succs.size() > ViewMISchedCutoff);
3650  }
3651 
3652  /// If you want to override the dot attributes printed for a particular
3653  /// edge, override this method.
3654  static std::string getEdgeAttributes(const SUnit *Node,
3655  SUnitIterator EI,
3656  const ScheduleDAG *Graph) {
3657  if (EI.isArtificialDep())
3658  return "color=cyan,style=dashed";
3659  if (EI.isCtrlDep())
3660  return "color=blue,style=dashed";
3661  return "";
3662  }
3663 
3664  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3665  std::string Str;
3666  raw_string_ostream SS(Str);
3667  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3668  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3669  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3670  SS << "SU:" << SU->NodeNum;
3671  if (DFS)
3672  SS << " I:" << DFS->getNumInstrs(SU);
3673  return SS.str();
3674  }
3675 
3676  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3677  return G->getGraphNodeLabel(SU);
3678  }
3679 
3680  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3681  std::string Str("shape=Mrecord");
3682  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3683  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3684  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3685  if (DFS) {
3686  Str += ",style=filled,fillcolor=\"#";
3687  Str += DOT::getColorString(DFS->getSubtreeID(N));
3688  Str += '"';
3689  }
3690  return Str;
3691  }
3692 };
3693 
3694 } // end namespace llvm
3695 #endif // NDEBUG
3696 
3697 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3698 /// rendered using 'dot'.
3699 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3700 #ifndef NDEBUG
3701  ViewGraph(this, Name, false, Title);
3702 #else
3703  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3704  << "systems with Graphviz or gv!\n";
3705 #endif // NDEBUG
3706 }
3707 
3708 /// Out-of-line implementation with no arguments is handy for gdb.
3710  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3711 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:207
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:211
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1190
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:328
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:528
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:379
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:388
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
Definition: MachineInstr.h:998
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:416
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:63
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:210
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:806
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:325
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:372
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:505
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:344