LLVM  9.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
245 
246 /// A dummy default scheduler factory indicates whether the scheduler
247 /// is overridden on the command line.
249  return nullptr;
250 }
251 
252 /// MachineSchedOpt allows command line selection of the scheduler.
255 MachineSchedOpt("misched",
257  cl::desc("Machine instruction scheduler to use"));
258 
260 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
262 
264  "enable-misched",
265  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
266  cl::Hidden);
267 
269  "enable-post-misched",
270  cl::desc("Enable the post-ra machine instruction scheduling pass."),
271  cl::init(true), cl::Hidden);
272 
273 /// Decrement this iterator until reaching the top or a non-debug instr.
277  assert(I != Beg && "reached the top of the region, cannot decrement");
278  while (--I != Beg) {
279  if (!I->isDebugInstr())
280  break;
281  }
282  return I;
283 }
284 
285 /// Non-const version.
291 }
292 
293 /// If this iterator is a debug value, increment until reaching the End or a
294 /// non-debug instruction.
298  for(; I != End; ++I) {
299  if (!I->isDebugInstr())
300  break;
301  }
302  return I;
303 }
304 
305 /// Non-const version.
311 }
312 
313 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
314 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
315  // Select the scheduler, or set the default.
316  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
317  if (Ctor != useDefaultMachineSched)
318  return Ctor(this);
319 
320  // Get the default scheduler set by the target for this function.
321  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
322  if (Scheduler)
323  return Scheduler;
324 
325  // Default to GenericScheduler.
326  return createGenericSchedLive(this);
327 }
328 
329 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
330 /// the caller. We don't have a command line option to override the postRA
331 /// scheduler. The Target must configure it.
332 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
333  // Get the postRA scheduler set by the target for this function.
334  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
335  if (Scheduler)
336  return Scheduler;
337 
338  // Default to GenericScheduler.
339  return createGenericSchedPostRA(this);
340 }
341 
342 /// Top-level MachineScheduler pass driver.
343 ///
344 /// Visit blocks in function order. Divide each block into scheduling regions
345 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
346 /// consistent with the DAG builder, which traverses the interior of the
347 /// scheduling regions bottom-up.
348 ///
349 /// This design avoids exposing scheduling boundaries to the DAG builder,
350 /// simplifying the DAG builder's support for "special" target instructions.
351 /// At the same time the design allows target schedulers to operate across
352 /// scheduling boundaries, for example to bundle the boundary instructions
353 /// without reordering them. This creates complexity, because the target
354 /// scheduler must update the RegionBegin and RegionEnd positions cached by
355 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
356 /// design would be to split blocks at scheduling boundaries, but LLVM has a
357 /// general bias against block splitting purely for implementation simplicity.
358 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
359  if (skipFunction(mf.getFunction()))
360  return false;
361 
362  if (EnableMachineSched.getNumOccurrences()) {
363  if (!EnableMachineSched)
364  return false;
365  } else if (!mf.getSubtarget().enableMachineScheduler())
366  return false;
367 
368  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
369 
370  // Initialize the context of the pass.
371  MF = &mf;
372  MLI = &getAnalysis<MachineLoopInfo>();
373  MDT = &getAnalysis<MachineDominatorTree>();
374  PassConfig = &getAnalysis<TargetPassConfig>();
375  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
376 
377  LIS = &getAnalysis<LiveIntervals>();
378 
379  if (VerifyScheduling) {
380  LLVM_DEBUG(LIS->dump());
381  MF->verify(this, "Before machine scheduling.");
382  }
383  RegClassInfo->runOnMachineFunction(*MF);
384 
385  // Instantiate the selected scheduler for this target, function, and
386  // optimization level.
387  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
388  scheduleRegions(*Scheduler, false);
389 
390  LLVM_DEBUG(LIS->dump());
391  if (VerifyScheduling)
392  MF->verify(this, "After machine scheduling.");
393  return true;
394 }
395 
396 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
397  if (skipFunction(mf.getFunction()))
398  return false;
399 
400  if (EnablePostRAMachineSched.getNumOccurrences()) {
401  if (!EnablePostRAMachineSched)
402  return false;
403  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
404  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
405  return false;
406  }
407  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
408 
409  // Initialize the context of the pass.
410  MF = &mf;
411  MLI = &getAnalysis<MachineLoopInfo>();
412  PassConfig = &getAnalysis<TargetPassConfig>();
413 
414  if (VerifyScheduling)
415  MF->verify(this, "Before post machine scheduling.");
416 
417  // Instantiate the selected scheduler for this target, function, and
418  // optimization level.
419  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
420  scheduleRegions(*Scheduler, true);
421 
422  if (VerifyScheduling)
423  MF->verify(this, "After post machine scheduling.");
424  return true;
425 }
426 
427 /// Return true of the given instruction should not be included in a scheduling
428 /// region.
429 ///
430 /// MachineScheduler does not currently support scheduling across calls. To
431 /// handle calls, the DAG builder needs to be modified to create register
432 /// anti/output dependencies on the registers clobbered by the call's regmask
433 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
434 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
435 /// the boundary, but there would be no benefit to postRA scheduling across
436 /// calls this late anyway.
438  MachineBasicBlock *MBB,
439  MachineFunction *MF,
440  const TargetInstrInfo *TII) {
441  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
442 }
443 
444 /// A region of an MBB for scheduling.
445 namespace {
446 struct SchedRegion {
447  /// RegionBegin is the first instruction in the scheduling region, and
448  /// RegionEnd is either MBB->end() or the scheduling boundary after the
449  /// last instruction in the scheduling region. These iterators cannot refer
450  /// to instructions outside of the identified scheduling region because
451  /// those may be reordered before scheduling this region.
452  MachineBasicBlock::iterator RegionBegin;
453  MachineBasicBlock::iterator RegionEnd;
454  unsigned NumRegionInstrs;
455 
457  unsigned N) :
458  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
459 };
460 } // end anonymous namespace
461 
463 
464 static void
466  MBBRegionsVector &Regions,
467  bool RegionsTopDown) {
468  MachineFunction *MF = MBB->getParent();
469  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
470 
471  MachineBasicBlock::iterator I = nullptr;
472  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
473  RegionEnd != MBB->begin(); RegionEnd = I) {
474 
475  // Avoid decrementing RegionEnd for blocks with no terminator.
476  if (RegionEnd != MBB->end() ||
477  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
478  --RegionEnd;
479  }
480 
481  // The next region starts above the previous region. Look backward in the
482  // instruction stream until we find the nearest boundary.
483  unsigned NumRegionInstrs = 0;
484  I = RegionEnd;
485  for (;I != MBB->begin(); --I) {
486  MachineInstr &MI = *std::prev(I);
487  if (isSchedBoundary(&MI, &*MBB, MF, TII))
488  break;
489  if (!MI.isDebugInstr()) {
490  // MBB::size() uses instr_iterator to count. Here we need a bundle to
491  // count as a single instruction.
492  ++NumRegionInstrs;
493  }
494  }
495 
496  // It's possible we found a scheduling region that only has debug
497  // instructions. Don't bother scheduling these.
498  if (NumRegionInstrs != 0)
499  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
500  }
501 
502  if (RegionsTopDown)
503  std::reverse(Regions.begin(), Regions.end());
504 }
505 
506 /// Main driver for both MachineScheduler and PostMachineScheduler.
507 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
508  bool FixKillFlags) {
509  // Visit all machine basic blocks.
510  //
511  // TODO: Visit blocks in global postorder or postorder within the bottom-up
512  // loop tree. Then we can optionally compute global RegPressure.
513  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
514  MBB != MBBEnd; ++MBB) {
515 
516  Scheduler.startBlock(&*MBB);
517 
518 #ifndef NDEBUG
519  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
520  continue;
521  if (SchedOnlyBlock.getNumOccurrences()
522  && (int)SchedOnlyBlock != MBB->getNumber())
523  continue;
524 #endif
525 
526  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
527  // points to the scheduling boundary at the bottom of the region. The DAG
528  // does not include RegionEnd, but the region does (i.e. the next
529  // RegionEnd is above the previous RegionBegin). If the current block has
530  // no terminator then RegionEnd == MBB->end() for the bottom region.
531  //
532  // All the regions of MBB are first found and stored in MBBRegions, which
533  // will be processed (MBB) top-down if initialized with true.
534  //
535  // The Scheduler may insert instructions during either schedule() or
536  // exitRegion(), even for empty regions. So the local iterators 'I' and
537  // 'RegionEnd' are invalid across these calls. Instructions must not be
538  // added to other regions than the current one without updating MBBRegions.
539 
540  MBBRegionsVector MBBRegions;
541  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
542  for (MBBRegionsVector::iterator R = MBBRegions.begin();
543  R != MBBRegions.end(); ++R) {
544  MachineBasicBlock::iterator I = R->RegionBegin;
545  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
546  unsigned NumRegionInstrs = R->NumRegionInstrs;
547 
548  // Notify the scheduler of the region, even if we may skip scheduling
549  // it. Perhaps it still needs to be bundled.
550  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
551 
552  // Skip empty scheduling regions (0 or 1 schedulable instructions).
553  if (I == RegionEnd || I == std::prev(RegionEnd)) {
554  // Close the current region. Bundle the terminator if needed.
555  // This invalidates 'RegionEnd' and 'I'.
556  Scheduler.exitRegion();
557  continue;
558  }
559  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
560  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
561  << " " << MBB->getName() << "\n From: " << *I
562  << " To: ";
563  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
564  else dbgs() << "End";
565  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
567  errs() << MF->getName();
568  errs() << ":%bb. " << MBB->getNumber();
569  errs() << " " << MBB->getName() << " \n";
570  }
571 
572  // Schedule a region: possibly reorder instructions.
573  // This invalidates the original region iterators.
574  Scheduler.schedule();
575 
576  // Close the current region.
577  Scheduler.exitRegion();
578  }
579  Scheduler.finishBlock();
580  // FIXME: Ideally, no further passes should rely on kill flags. However,
581  // thumb2 size reduction is currently an exception, so the PostMIScheduler
582  // needs to do this.
583  if (FixKillFlags)
584  Scheduler.fixupKills(*MBB);
585  }
586  Scheduler.finalizeSchedule();
587 }
588 
589 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
590  // unimplemented
591 }
592 
593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
595  dbgs() << "Queue " << Name << ": ";
596  for (const SUnit *SU : Queue)
597  dbgs() << SU->NodeNum << " ";
598  dbgs() << "\n";
599 }
600 #endif
601 
602 //===----------------------------------------------------------------------===//
603 // ScheduleDAGMI - Basic machine instruction scheduling. This is
604 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
605 // virtual registers.
606 // ===----------------------------------------------------------------------===/
607 
608 // Provide a vtable anchor.
610 
611 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
612 /// NumPredsLeft reaches zero, release the successor node.
613 ///
614 /// FIXME: Adjust SuccSU height based on MinLatency.
615 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
616  SUnit *SuccSU = SuccEdge->getSUnit();
617 
618  if (SuccEdge->isWeak()) {
619  --SuccSU->WeakPredsLeft;
620  if (SuccEdge->isCluster())
621  NextClusterSucc = SuccSU;
622  return;
623  }
624 #ifndef NDEBUG
625  if (SuccSU->NumPredsLeft == 0) {
626  dbgs() << "*** Scheduling failed! ***\n";
627  dumpNode(*SuccSU);
628  dbgs() << " has been released too many times!\n";
629  llvm_unreachable(nullptr);
630  }
631 #endif
632  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
633  // CurrCycle may have advanced since then.
634  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
635  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
636 
637  --SuccSU->NumPredsLeft;
638  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
639  SchedImpl->releaseTopNode(SuccSU);
640 }
641 
642 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
644  for (SDep &Succ : SU->Succs)
645  releaseSucc(SU, &Succ);
646 }
647 
648 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
649 /// NumSuccsLeft reaches zero, release the predecessor node.
650 ///
651 /// FIXME: Adjust PredSU height based on MinLatency.
652 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
653  SUnit *PredSU = PredEdge->getSUnit();
654 
655  if (PredEdge->isWeak()) {
656  --PredSU->WeakSuccsLeft;
657  if (PredEdge->isCluster())
658  NextClusterPred = PredSU;
659  return;
660  }
661 #ifndef NDEBUG
662  if (PredSU->NumSuccsLeft == 0) {
663  dbgs() << "*** Scheduling failed! ***\n";
664  dumpNode(*PredSU);
665  dbgs() << " has been released too many times!\n";
666  llvm_unreachable(nullptr);
667  }
668 #endif
669  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
670  // CurrCycle may have advanced since then.
671  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
672  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
673 
674  --PredSU->NumSuccsLeft;
675  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
676  SchedImpl->releaseBottomNode(PredSU);
677 }
678 
679 /// releasePredecessors - Call releasePred on each of SU's predecessors.
681  for (SDep &Pred : SU->Preds)
682  releasePred(SU, &Pred);
683 }
684 
687  SchedImpl->enterMBB(bb);
688 }
689 
691  SchedImpl->leaveMBB();
693 }
694 
695 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
696 /// crossing a scheduling boundary. [begin, end) includes all instructions in
697 /// the region, including the boundary itself and single-instruction regions
698 /// that don't get scheduled.
702  unsigned regioninstrs)
703 {
704  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
705 
706  SchedImpl->initPolicy(begin, end, regioninstrs);
707 }
708 
709 /// This is normally called from the main scheduler loop but may also be invoked
710 /// by the scheduling strategy to perform additional code motion.
713  // Advance RegionBegin if the first instruction moves down.
714  if (&*RegionBegin == MI)
715  ++RegionBegin;
716 
717  // Update the instruction stream.
718  BB->splice(InsertPos, BB, MI);
719 
720  // Update LiveIntervals
721  if (LIS)
722  LIS->handleMove(*MI, /*UpdateFlags=*/true);
723 
724  // Recede RegionBegin if an instruction moves above the first.
725  if (RegionBegin == InsertPos)
726  RegionBegin = MI;
727 }
728 
730 #ifndef NDEBUG
731  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
732  CurrentTop = CurrentBottom;
733  return false;
734  }
735  ++NumInstrsScheduled;
736 #endif
737  return true;
738 }
739 
740 /// Per-region scheduling driver, called back from
741 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
742 /// does not consider liveness or register pressure. It is useful for PostRA
743 /// scheduling and potentially other custom schedulers.
745  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
746  LLVM_DEBUG(SchedImpl->dumpPolicy());
747 
748  // Build the DAG.
749  buildSchedGraph(AA);
750 
751  postprocessDAG();
752 
753  SmallVector<SUnit*, 8> TopRoots, BotRoots;
754  findRootsAndBiasEdges(TopRoots, BotRoots);
755 
756  LLVM_DEBUG(dump());
757  if (PrintDAGs) dump();
758  if (ViewMISchedDAGs) viewGraph();
759 
760  // Initialize the strategy before modifying the DAG.
761  // This may initialize a DFSResult to be used for queue priority.
762  SchedImpl->initialize(this);
763 
764  // Initialize ready queues now that the DAG and priority data are finalized.
765  initQueues(TopRoots, BotRoots);
766 
767  bool IsTopNode = false;
768  while (true) {
769  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
770  SUnit *SU = SchedImpl->pickNode(IsTopNode);
771  if (!SU) break;
772 
773  assert(!SU->isScheduled && "Node already scheduled");
774  if (!checkSchedLimit())
775  break;
776 
777  MachineInstr *MI = SU->getInstr();
778  if (IsTopNode) {
779  assert(SU->isTopReady() && "node still has unscheduled dependencies");
780  if (&*CurrentTop == MI)
781  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
782  else
783  moveInstruction(MI, CurrentTop);
784  } else {
785  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
787  priorNonDebug(CurrentBottom, CurrentTop);
788  if (&*priorII == MI)
789  CurrentBottom = priorII;
790  else {
791  if (&*CurrentTop == MI)
792  CurrentTop = nextIfDebug(++CurrentTop, priorII);
793  moveInstruction(MI, CurrentBottom);
794  CurrentBottom = MI;
795  }
796  }
797  // Notify the scheduling strategy before updating the DAG.
798  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
799  // runs, it can then use the accurate ReadyCycle time to determine whether
800  // newly released nodes can move to the readyQ.
801  SchedImpl->schedNode(SU, IsTopNode);
802 
803  updateQueues(SU, IsTopNode);
804  }
805  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
806 
807  placeDebugValues();
808 
809  LLVM_DEBUG({
810  dbgs() << "*** Final schedule for "
811  << printMBBReference(*begin()->getParent()) << " ***\n";
812  dumpSchedule();
813  dbgs() << '\n';
814  });
815 }
816 
817 /// Apply each ScheduleDAGMutation step in order.
819  for (auto &m : Mutations)
820  m->apply(this);
821 }
822 
823 void ScheduleDAGMI::
825  SmallVectorImpl<SUnit*> &BotRoots) {
826  for (SUnit &SU : SUnits) {
827  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
828 
829  // Order predecessors so DFSResult follows the critical path.
830  SU.biasCriticalPath();
831 
832  // A SUnit is ready to top schedule if it has no predecessors.
833  if (!SU.NumPredsLeft)
834  TopRoots.push_back(&SU);
835  // A SUnit is ready to bottom schedule if it has no successors.
836  if (!SU.NumSuccsLeft)
837  BotRoots.push_back(&SU);
838  }
839  ExitSU.biasCriticalPath();
840 }
841 
842 /// Identify DAG roots and setup scheduler queues.
844  ArrayRef<SUnit*> BotRoots) {
845  NextClusterSucc = nullptr;
846  NextClusterPred = nullptr;
847 
848  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
849  //
850  // Nodes with unreleased weak edges can still be roots.
851  // Release top roots in forward order.
852  for (SUnit *SU : TopRoots)
853  SchedImpl->releaseTopNode(SU);
854 
855  // Release bottom roots in reverse order so the higher priority nodes appear
856  // first. This is more natural and slightly more efficient.
858  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
859  SchedImpl->releaseBottomNode(*I);
860  }
861 
862  releaseSuccessors(&EntrySU);
863  releasePredecessors(&ExitSU);
864 
865  SchedImpl->registerRoots();
866 
867  // Advance past initial DebugValues.
868  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
869  CurrentBottom = RegionEnd;
870 }
871 
872 /// Update scheduler queues after scheduling an instruction.
873 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
874  // Release dependent instructions for scheduling.
875  if (IsTopNode)
876  releaseSuccessors(SU);
877  else
878  releasePredecessors(SU);
879 
880  SU->isScheduled = true;
881 }
882 
883 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
885  // If first instruction was a DBG_VALUE then put it back.
886  if (FirstDbgValue) {
887  BB->splice(RegionBegin, BB, FirstDbgValue);
888  RegionBegin = FirstDbgValue;
889  }
890 
891  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
892  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
893  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
894  MachineInstr *DbgValue = P.first;
895  MachineBasicBlock::iterator OrigPrevMI = P.second;
896  if (&*RegionBegin == DbgValue)
897  ++RegionBegin;
898  BB->splice(++OrigPrevMI, BB, DbgValue);
899  if (OrigPrevMI == std::prev(RegionEnd))
900  RegionEnd = DbgValue;
901  }
902  DbgValues.clear();
903  FirstDbgValue = nullptr;
904 }
905 
906 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
908  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
909  if (SUnit *SU = getSUnit(&(*MI)))
910  dumpNode(*SU);
911  else
912  dbgs() << "Missing SUnit\n";
913  }
914 }
915 #endif
916 
917 //===----------------------------------------------------------------------===//
918 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
919 // preservation.
920 //===----------------------------------------------------------------------===//
921 
923  delete DFSResult;
924 }
925 
927  const MachineInstr &MI = *SU.getInstr();
928  for (const MachineOperand &MO : MI.operands()) {
929  if (!MO.isReg())
930  continue;
931  if (!MO.readsReg())
932  continue;
933  if (TrackLaneMasks && !MO.isUse())
934  continue;
935 
936  unsigned Reg = MO.getReg();
938  continue;
939 
940  // Ignore re-defs.
941  if (TrackLaneMasks) {
942  bool FoundDef = false;
943  for (const MachineOperand &MO2 : MI.operands()) {
944  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
945  FoundDef = true;
946  break;
947  }
948  }
949  if (FoundDef)
950  continue;
951  }
952 
953  // Record this local VReg use.
954  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
955  for (; UI != VRegUses.end(); ++UI) {
956  if (UI->SU == &SU)
957  break;
958  }
959  if (UI == VRegUses.end())
960  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
961  }
962 }
963 
964 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
965 /// crossing a scheduling boundary. [begin, end) includes all instructions in
966 /// the region, including the boundary itself and single-instruction regions
967 /// that don't get scheduled.
971  unsigned regioninstrs)
972 {
973  // ScheduleDAGMI initializes SchedImpl's per-region policy.
974  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
975 
976  // For convenience remember the end of the liveness region.
977  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
978 
979  SUPressureDiffs.clear();
980 
981  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
982  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
983 
984  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
985  "ShouldTrackLaneMasks requires ShouldTrackPressure");
986 }
987 
988 // Setup the register pressure trackers for the top scheduled top and bottom
989 // scheduled regions.
991  VRegUses.clear();
992  VRegUses.setUniverse(MRI.getNumVirtRegs());
993  for (SUnit &SU : SUnits)
994  collectVRegUses(SU);
995 
996  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
997  ShouldTrackLaneMasks, false);
998  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
999  ShouldTrackLaneMasks, false);
1000 
1001  // Close the RPTracker to finalize live ins.
1002  RPTracker.closeRegion();
1003 
1004  LLVM_DEBUG(RPTracker.dump());
1005 
1006  // Initialize the live ins and live outs.
1007  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1008  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1009 
1010  // Close one end of the tracker so we can call
1011  // getMaxUpward/DownwardPressureDelta before advancing across any
1012  // instructions. This converts currently live regs into live ins/outs.
1013  TopRPTracker.closeTop();
1014  BotRPTracker.closeBottom();
1015 
1016  BotRPTracker.initLiveThru(RPTracker);
1017  if (!BotRPTracker.getLiveThru().empty()) {
1018  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1019  LLVM_DEBUG(dbgs() << "Live Thru: ";
1020  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1021  };
1022 
1023  // For each live out vreg reduce the pressure change associated with other
1024  // uses of the same vreg below the live-out reaching def.
1025  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1026 
1027  // Account for liveness generated by the region boundary.
1028  if (LiveRegionEnd != RegionEnd) {
1030  BotRPTracker.recede(&LiveUses);
1031  updatePressureDiffs(LiveUses);
1032  }
1033 
1034  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1035  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1036  dbgs() << "Bottom Pressure:\n";
1037  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1038 
1039  assert((BotRPTracker.getPos() == RegionEnd ||
1040  (RegionEnd->isDebugInstr() &&
1041  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1042  "Can't find the region bottom");
1043 
1044  // Cache the list of excess pressure sets in this region. This will also track
1045  // the max pressure in the scheduled code for these sets.
1046  RegionCriticalPSets.clear();
1047  const std::vector<unsigned> &RegionPressure =
1048  RPTracker.getPressure().MaxSetPressure;
1049  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1050  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1051  if (RegionPressure[i] > Limit) {
1052  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1053  << " Actual " << RegionPressure[i] << "\n");
1054  RegionCriticalPSets.push_back(PressureChange(i));
1055  }
1056  }
1057  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1058  for (const PressureChange &RCPS
1059  : RegionCriticalPSets) dbgs()
1060  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1061  dbgs() << "\n");
1062 }
1063 
1066  const std::vector<unsigned> &NewMaxPressure) {
1067  const PressureDiff &PDiff = getPressureDiff(SU);
1068  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1069  for (const PressureChange &PC : PDiff) {
1070  if (!PC.isValid())
1071  break;
1072  unsigned ID = PC.getPSet();
1073  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1074  ++CritIdx;
1075  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1076  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1077  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1078  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1079  }
1080  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1081  if (NewMaxPressure[ID] >= Limit - 2) {
1082  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1083  << NewMaxPressure[ID]
1084  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1085  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1086  << " livethru)\n");
1087  }
1088  }
1089 }
1090 
1091 /// Update the PressureDiff array for liveness after scheduling this
1092 /// instruction.
1094  ArrayRef<RegisterMaskPair> LiveUses) {
1095  for (const RegisterMaskPair &P : LiveUses) {
1096  unsigned Reg = P.RegUnit;
1097  /// FIXME: Currently assuming single-use physregs.
1098  if (!TRI->isVirtualRegister(Reg))
1099  continue;
1100 
1101  if (ShouldTrackLaneMasks) {
1102  // If the register has just become live then other uses won't change
1103  // this fact anymore => decrement pressure.
1104  // If the register has just become dead then other uses make it come
1105  // back to life => increment pressure.
1106  bool Decrement = P.LaneMask.any();
1107 
1108  for (const VReg2SUnit &V2SU
1109  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1110  SUnit &SU = *V2SU.SU;
1111  if (SU.isScheduled || &SU == &ExitSU)
1112  continue;
1113 
1114  PressureDiff &PDiff = getPressureDiff(&SU);
1115  PDiff.addPressureChange(Reg, Decrement, &MRI);
1116  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1117  << printReg(Reg, TRI) << ':'
1118  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1119  dbgs() << " to "; PDiff.dump(*TRI););
1120  }
1121  } else {
1122  assert(P.LaneMask.any());
1123  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1124  // This may be called before CurrentBottom has been initialized. However,
1125  // BotRPTracker must have a valid position. We want the value live into the
1126  // instruction or live out of the block, so ask for the previous
1127  // instruction's live-out.
1128  const LiveInterval &LI = LIS->getInterval(Reg);
1129  VNInfo *VNI;
1131  nextIfDebug(BotRPTracker.getPos(), BB->end());
1132  if (I == BB->end())
1133  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1134  else {
1135  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1136  VNI = LRQ.valueIn();
1137  }
1138  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1139  assert(VNI && "No live value at use.");
1140  for (const VReg2SUnit &V2SU
1141  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1142  SUnit *SU = V2SU.SU;
1143  // If this use comes before the reaching def, it cannot be a last use,
1144  // so decrease its pressure change.
1145  if (!SU->isScheduled && SU != &ExitSU) {
1146  LiveQueryResult LRQ =
1147  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1148  if (LRQ.valueIn() == VNI) {
1149  PressureDiff &PDiff = getPressureDiff(SU);
1150  PDiff.addPressureChange(Reg, true, &MRI);
1151  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1152  << *SU->getInstr();
1153  dbgs() << " to "; PDiff.dump(*TRI););
1154  }
1155  }
1156  }
1157  }
1158  }
1159 }
1160 
1162 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1163  if (EntrySU.getInstr() != nullptr)
1164  dumpNodeAll(EntrySU);
1165  for (const SUnit &SU : SUnits) {
1166  dumpNodeAll(SU);
1167  if (ShouldTrackPressure) {
1168  dbgs() << " Pressure Diff : ";
1169  getPressureDiff(&SU).dump(*TRI);
1170  }
1171  dbgs() << " Single Issue : ";
1172  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1173  SchedModel.mustEndGroup(SU.getInstr()))
1174  dbgs() << "true;";
1175  else
1176  dbgs() << "false;";
1177  dbgs() << '\n';
1178  }
1179  if (ExitSU.getInstr() != nullptr)
1180  dumpNodeAll(ExitSU);
1181 #endif
1182 }
1183 
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1195  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196  LLVM_DEBUG(SchedImpl->dumpPolicy());
1197  buildDAGWithRegPressure();
1198 
1199  postprocessDAG();
1200 
1201  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202  findRootsAndBiasEdges(TopRoots, BotRoots);
1203 
1204  // Initialize the strategy before modifying the DAG.
1205  // This may initialize a DFSResult to be used for queue priority.
1206  SchedImpl->initialize(this);
1207 
1208  LLVM_DEBUG(dump());
1209  if (PrintDAGs) dump();
1210  if (ViewMISchedDAGs) viewGraph();
1211 
1212  // Initialize ready queues now that the DAG and priority data are finalized.
1213  initQueues(TopRoots, BotRoots);
1214 
1215  bool IsTopNode = false;
1216  while (true) {
1217  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1218  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1219  if (!SU) break;
1220 
1221  assert(!SU->isScheduled && "Node already scheduled");
1222  if (!checkSchedLimit())
1223  break;
1224 
1225  scheduleMI(SU, IsTopNode);
1226 
1227  if (DFSResult) {
1228  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1229  if (!ScheduledTrees.test(SubtreeID)) {
1230  ScheduledTrees.set(SubtreeID);
1231  DFSResult->scheduleTree(SubtreeID);
1232  SchedImpl->scheduleTree(SubtreeID);
1233  }
1234  }
1235 
1236  // Notify the scheduling strategy after updating the DAG.
1237  SchedImpl->schedNode(SU, IsTopNode);
1238 
1239  updateQueues(SU, IsTopNode);
1240  }
1241  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1242 
1243  placeDebugValues();
1244 
1245  LLVM_DEBUG({
1246  dbgs() << "*** Final schedule for "
1247  << printMBBReference(*begin()->getParent()) << " ***\n";
1248  dumpSchedule();
1249  dbgs() << '\n';
1250  });
1251 }
1252 
1253 /// Build the DAG and setup three register pressure trackers.
1255  if (!ShouldTrackPressure) {
1256  RPTracker.reset();
1257  RegionCriticalPSets.clear();
1258  buildSchedGraph(AA);
1259  return;
1260  }
1261 
1262  // Initialize the register pressure tracker used by buildSchedGraph.
1263  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1264  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1265 
1266  // Account for liveness generate by the region boundary.
1267  if (LiveRegionEnd != RegionEnd)
1268  RPTracker.recede();
1269 
1270  // Build the DAG, and compute current register pressure.
1271  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1272 
1273  // Initialize top/bottom trackers after computing region pressure.
1274  initRegPressure();
1275 }
1276 
1278  if (!DFSResult)
1279  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1280  DFSResult->clear();
1281  ScheduledTrees.clear();
1282  DFSResult->resize(SUnits.size());
1283  DFSResult->compute(SUnits);
1284  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1285 }
1286 
1287 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1288 /// only provides the critical path for single block loops. To handle loops that
1289 /// span blocks, we could use the vreg path latencies provided by
1290 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1291 /// available for use in the scheduler.
1292 ///
1293 /// The cyclic path estimation identifies a def-use pair that crosses the back
1294 /// edge and considers the depth and height of the nodes. For example, consider
1295 /// the following instruction sequence where each instruction has unit latency
1296 /// and defines an epomymous virtual register:
1297 ///
1298 /// a->b(a,c)->c(b)->d(c)->exit
1299 ///
1300 /// The cyclic critical path is a two cycles: b->c->b
1301 /// The acyclic critical path is four cycles: a->b->c->d->exit
1302 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1303 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1304 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1305 /// LiveInDepth = depth(b) = len(a->b) = 1
1306 ///
1307 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1308 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1309 /// CyclicCriticalPath = min(2, 2) = 2
1310 ///
1311 /// This could be relevant to PostRA scheduling, but is currently implemented
1312 /// assuming LiveIntervals.
1314  // This only applies to single block loop.
1315  if (!BB->isSuccessor(BB))
1316  return 0;
1317 
1318  unsigned MaxCyclicLatency = 0;
1319  // Visit each live out vreg def to find def/use pairs that cross iterations.
1320  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1321  unsigned Reg = P.RegUnit;
1322  if (!TRI->isVirtualRegister(Reg))
1323  continue;
1324  const LiveInterval &LI = LIS->getInterval(Reg);
1325  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1326  if (!DefVNI)
1327  continue;
1328 
1329  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1330  const SUnit *DefSU = getSUnit(DefMI);
1331  if (!DefSU)
1332  continue;
1333 
1334  unsigned LiveOutHeight = DefSU->getHeight();
1335  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1336  // Visit all local users of the vreg def.
1337  for (const VReg2SUnit &V2SU
1338  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1339  SUnit *SU = V2SU.SU;
1340  if (SU == &ExitSU)
1341  continue;
1342 
1343  // Only consider uses of the phi.
1344  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1345  if (!LRQ.valueIn()->isPHIDef())
1346  continue;
1347 
1348  // Assume that a path spanning two iterations is a cycle, which could
1349  // overestimate in strange cases. This allows cyclic latency to be
1350  // estimated as the minimum slack of the vreg's depth or height.
1351  unsigned CyclicLatency = 0;
1352  if (LiveOutDepth > SU->getDepth())
1353  CyclicLatency = LiveOutDepth - SU->getDepth();
1354 
1355  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1356  if (LiveInHeight > LiveOutHeight) {
1357  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1358  CyclicLatency = LiveInHeight - LiveOutHeight;
1359  } else
1360  CyclicLatency = 0;
1361 
1362  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1363  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1364  if (CyclicLatency > MaxCyclicLatency)
1365  MaxCyclicLatency = CyclicLatency;
1366  }
1367  }
1368  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1369  return MaxCyclicLatency;
1370 }
1371 
1372 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1373 /// the Top RP tracker in case the region beginning has changed.
1375  ArrayRef<SUnit*> BotRoots) {
1376  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1377  if (ShouldTrackPressure) {
1378  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1379  TopRPTracker.setPos(CurrentTop);
1380  }
1381 }
1382 
1383 /// Move an instruction and update register pressure.
1384 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1385  // Move the instruction to its new location in the instruction stream.
1386  MachineInstr *MI = SU->getInstr();
1387 
1388  if (IsTopNode) {
1389  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1390  if (&*CurrentTop == MI)
1391  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1392  else {
1393  moveInstruction(MI, CurrentTop);
1394  TopRPTracker.setPos(MI);
1395  }
1396 
1397  if (ShouldTrackPressure) {
1398  // Update top scheduled pressure.
1399  RegisterOperands RegOpers;
1400  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1401  if (ShouldTrackLaneMasks) {
1402  // Adjust liveness and add missing dead+read-undef flags.
1403  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1404  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1405  } else {
1406  // Adjust for missing dead-def flags.
1407  RegOpers.detectDeadDefs(*MI, *LIS);
1408  }
1409 
1410  TopRPTracker.advance(RegOpers);
1411  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1412  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1413  TopRPTracker.getRegSetPressureAtPos(), TRI););
1414 
1415  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1416  }
1417  } else {
1418  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1419  MachineBasicBlock::iterator priorII =
1420  priorNonDebug(CurrentBottom, CurrentTop);
1421  if (&*priorII == MI)
1422  CurrentBottom = priorII;
1423  else {
1424  if (&*CurrentTop == MI) {
1425  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1426  TopRPTracker.setPos(CurrentTop);
1427  }
1428  moveInstruction(MI, CurrentBottom);
1429  CurrentBottom = MI;
1430  BotRPTracker.setPos(CurrentBottom);
1431  }
1432  if (ShouldTrackPressure) {
1433  RegisterOperands RegOpers;
1434  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1435  if (ShouldTrackLaneMasks) {
1436  // Adjust liveness and add missing dead+read-undef flags.
1437  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1438  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1439  } else {
1440  // Adjust for missing dead-def flags.
1441  RegOpers.detectDeadDefs(*MI, *LIS);
1442  }
1443 
1444  if (BotRPTracker.getPos() != CurrentBottom)
1445  BotRPTracker.recedeSkipDebugValues();
1447  BotRPTracker.recede(RegOpers, &LiveUses);
1448  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1449  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1450  BotRPTracker.getRegSetPressureAtPos(), TRI););
1451 
1452  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1453  updatePressureDiffs(LiveUses);
1454  }
1455  }
1456 }
1457 
1458 //===----------------------------------------------------------------------===//
1459 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1460 //===----------------------------------------------------------------------===//
1461 
1462 namespace {
1463 
1464 /// Post-process the DAG to create cluster edges between neighboring
1465 /// loads or between neighboring stores.
1466 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1467  struct MemOpInfo {
1468  SUnit *SU;
1469  const MachineOperand *BaseOp;
1470  int64_t Offset;
1471 
1472  MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1473  : SU(su), BaseOp(Op), Offset(ofs) {}
1474 
1475  bool operator<(const MemOpInfo &RHS) const {
1476  if (BaseOp->getType() != RHS.BaseOp->getType())
1477  return BaseOp->getType() < RHS.BaseOp->getType();
1478 
1479  if (BaseOp->isReg())
1480  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1481  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1482  RHS.SU->NodeNum);
1483  if (BaseOp->isFI()) {
1484  const MachineFunction &MF =
1485  *BaseOp->getParent()->getParent()->getParent();
1486  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1487  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1489  // Can't use tuple comparison here since we might need to use a
1490  // different order when the stack grows down.
1491  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1492  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1493  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1494 
1495  if (Offset != RHS.Offset)
1496  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1497 
1498  return SU->NodeNum < RHS.SU->NodeNum;
1499  }
1500 
1501  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1502  "index bases.");
1503  }
1504  };
1505 
1506  const TargetInstrInfo *TII;
1507  const TargetRegisterInfo *TRI;
1508  bool IsLoad;
1509 
1510 public:
1511  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1512  const TargetRegisterInfo *tri, bool IsLoad)
1513  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1514 
1515  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1516 
1517 protected:
1518  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1519 };
1520 
1521 class StoreClusterMutation : public BaseMemOpClusterMutation {
1522 public:
1523  StoreClusterMutation(const TargetInstrInfo *tii,
1524  const TargetRegisterInfo *tri)
1525  : BaseMemOpClusterMutation(tii, tri, false) {}
1526 };
1527 
1528 class LoadClusterMutation : public BaseMemOpClusterMutation {
1529 public:
1530  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1531  : BaseMemOpClusterMutation(tii, tri, true) {}
1532 };
1533 
1534 } // end anonymous namespace
1535 
1536 namespace llvm {
1537 
1538 std::unique_ptr<ScheduleDAGMutation>
1540  const TargetRegisterInfo *TRI) {
1541  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1542  : nullptr;
1543 }
1544 
1545 std::unique_ptr<ScheduleDAGMutation>
1547  const TargetRegisterInfo *TRI) {
1548  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1549  : nullptr;
1550 }
1551 
1552 } // end namespace llvm
1553 
1554 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1555  ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1556  SmallVector<MemOpInfo, 32> MemOpRecords;
1557  for (SUnit *SU : MemOps) {
1558  const MachineOperand *BaseOp;
1559  int64_t Offset;
1560  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1561  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1562  }
1563  if (MemOpRecords.size() < 2)
1564  return;
1565 
1566  llvm::sort(MemOpRecords);
1567  unsigned ClusterLength = 1;
1568  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1569  SUnit *SUa = MemOpRecords[Idx].SU;
1570  SUnit *SUb = MemOpRecords[Idx+1].SU;
1571  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1572  *MemOpRecords[Idx + 1].BaseOp,
1573  ClusterLength) &&
1574  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1575  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1576  << SUb->NodeNum << ")\n");
1577  // Copy successor edges from SUa to SUb. Interleaving computation
1578  // dependent on SUa can prevent load combining due to register reuse.
1579  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1580  // loads should have effectively the same inputs.
1581  for (const SDep &Succ : SUa->Succs) {
1582  if (Succ.getSUnit() == SUb)
1583  continue;
1584  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1585  << ")\n");
1586  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1587  }
1588  ++ClusterLength;
1589  } else
1590  ClusterLength = 1;
1591  }
1592 }
1593 
1594 /// Callback from DAG postProcessing to create cluster edges for loads.
1596  // Map DAG NodeNum to store chain ID.
1597  DenseMap<unsigned, unsigned> StoreChainIDs;
1598  // Map each store chain to a set of dependent MemOps.
1599  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1600  for (SUnit &SU : DAG->SUnits) {
1601  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1602  (!IsLoad && !SU.getInstr()->mayStore()))
1603  continue;
1604 
1605  unsigned ChainPredID = DAG->SUnits.size();
1606  for (const SDep &Pred : SU.Preds) {
1607  if (Pred.isCtrl()) {
1608  ChainPredID = Pred.getSUnit()->NodeNum;
1609  break;
1610  }
1611  }
1612  // Check if this chain-like pred has been seen
1613  // before. ChainPredID==MaxNodeID at the top of the schedule.
1614  unsigned NumChains = StoreChainDependents.size();
1615  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1616  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1617  if (Result.second)
1618  StoreChainDependents.resize(NumChains + 1);
1619  StoreChainDependents[Result.first->second].push_back(&SU);
1620  }
1621 
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChainDependents)
1624  clusterNeighboringMemOps(SCD, DAG);
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 // CopyConstrain - DAG post-processing to encourage copy elimination.
1629 //===----------------------------------------------------------------------===//
1630 
1631 namespace {
1632 
1633 /// Post-process the DAG to create weak edges from all uses of a copy to
1634 /// the one use that defines the copy's source vreg, most likely an induction
1635 /// variable increment.
1636 class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639 
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643 
1644 public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646 
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648 
1649 protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651 };
1652 
1653 } // end anonymous namespace
1654 
1655 namespace llvm {
1656 
1657 std::unique_ptr<ScheduleDAGMutation>
1659  const TargetRegisterInfo *TRI) {
1660  return llvm::make_unique<CopyConstrain>(TII, TRI);
1661 }
1662 
1663 } // end namespace llvm
1664 
1665 /// constrainLocalCopy handles two possibilities:
1666 /// 1) Local src:
1667 /// I0: = dst
1668 /// I1: src = ...
1669 /// I2: = dst
1670 /// I3: dst = src (copy)
1671 /// (create pred->succ edges I0->I1, I2->I1)
1672 ///
1673 /// 2) Local copy:
1674 /// I0: dst = src (copy)
1675 /// I1: = dst
1676 /// I2: src = ...
1677 /// I3: = dst
1678 /// (create pred->succ edges I1->I2, I3->I2)
1679 ///
1680 /// Although the MachineScheduler is currently constrained to single blocks,
1681 /// this algorithm should handle extended blocks. An EBB is a set of
1682 /// contiguously numbered blocks such that the previous block in the EBB is
1683 /// always the single predecessor.
1684 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687 
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  unsigned SrcReg = SrcOp.getReg();
1691  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692  return;
1693 
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  unsigned DstReg = DstOp.getReg();
1696  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1697  return;
1698 
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709  LocalReg = DstReg;
1710  GlobalReg = SrcReg;
1711  LocalLI = &LIS->getInterval(LocalReg);
1712  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713  return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716 
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724  return;
1725 
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731  ++GlobalSegment;
1732 
1733  if (GlobalSegment == GlobalLI->end())
1734  return;
1735 
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738  // Two address defs have no hole.
1739  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740  GlobalSegment->start)) {
1741  return;
1742  }
1743  // If the prior global segment may be defined by the same two-address
1744  // instruction that also defines LocalLI, then can't make a hole here.
1745  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746  LocalLI->beginIndex())) {
1747  return;
1748  }
1749  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750  // it would be a disconnected component in the live range.
1751  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752  "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756  return;
1757 
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760  return;
1761 
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770  continue;
1771  if (Succ.getSUnit() == GlobalSU)
1772  continue;
1773  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774  return;
1775  LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785  continue;
1786  if (Pred.getSUnit() == FirstLocalSU)
1787  continue;
1788  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789  return;
1790  GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1795  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1797  << GlobalSU->NodeNum << ")\n");
1798  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1801  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1803  << FirstLocalSU->NodeNum << ")\n");
1804  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806 }
1807 
1808 /// Callback from DAG postProcessing to create weak edges to encourage
1809 /// copy elimination.
1810 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813 
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816  return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819  *priorNonDebug(DAG->end(), DAG->begin()));
1820 
1821  for (SUnit &SU : DAG->SUnits) {
1822  if (!SU.getInstr()->isCopy())
1823  continue;
1824 
1825  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831 // and possibly other custom schedulers.
1832 //===----------------------------------------------------------------------===//
1833 
1834 static const unsigned InvalidCycle = ~0U;
1835 
1836 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837 
1838 /// Given a Count of resource usage and a Latency value, return true if a
1839 /// SchedBoundary becomes resource limited.
1840 /// If we are checking after scheduling a node, we should return true when
1841 /// we just reach the resource limit.
1842 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1843  unsigned Latency, bool AfterSchedNode) {
1844  int ResCntFactor = (int)(Count - (Latency * LFactor));
1845  if (AfterSchedNode)
1846  return ResCntFactor >= (int)LFactor;
1847  else
1848  return ResCntFactor > (int)LFactor;
1849 }
1850 
1852  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1853  // Destroying and reconstructing it is very expensive though. So keep
1854  // invalid, placeholder HazardRecs.
1855  if (HazardRec && HazardRec->isEnabled()) {
1856  delete HazardRec;
1857  HazardRec = nullptr;
1858  }
1859  Available.clear();
1860  Pending.clear();
1861  CheckPending = false;
1862  CurrCycle = 0;
1863  CurrMOps = 0;
1864  MinReadyCycle = std::numeric_limits<unsigned>::max();
1865  ExpectedLatency = 0;
1866  DependentLatency = 0;
1867  RetiredMOps = 0;
1868  MaxExecutedResCount = 0;
1869  ZoneCritResIdx = 0;
1870  IsResourceLimited = false;
1871  ReservedCycles.clear();
1872  ReservedCyclesIndex.clear();
1873 #ifndef NDEBUG
1874  // Track the maximum number of stall cycles that could arise either from the
1875  // latency of a DAG edge or the number of cycles that a processor resource is
1876  // reserved (SchedBoundary::ReservedCycles).
1877  MaxObservedStall = 0;
1878 #endif
1879  // Reserve a zero-count for invalid CritResIdx.
1880  ExecutedResCounts.resize(1);
1881  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1882 }
1883 
1884 void SchedRemainder::
1885 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1886  reset();
1887  if (!SchedModel->hasInstrSchedModel())
1888  return;
1889  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1890  for (SUnit &SU : DAG->SUnits) {
1891  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1892  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1893  * SchedModel->getMicroOpFactor();
1895  PI = SchedModel->getWriteProcResBegin(SC),
1896  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1897  unsigned PIdx = PI->ProcResourceIdx;
1898  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1899  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1900  }
1901  }
1902 }
1903 
1904 void SchedBoundary::
1906  reset();
1907  DAG = dag;
1908  SchedModel = smodel;
1909  Rem = rem;
1910  if (SchedModel->hasInstrSchedModel()) {
1911  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1912  ReservedCyclesIndex.resize(ResourceCount);
1913  ExecutedResCounts.resize(ResourceCount);
1914  unsigned NumUnits = 0;
1915 
1916  for (unsigned i = 0; i < ResourceCount; ++i) {
1917  ReservedCyclesIndex[i] = NumUnits;
1918  NumUnits += SchedModel->getProcResource(i)->NumUnits;
1919  }
1920 
1921  ReservedCycles.resize(NumUnits, InvalidCycle);
1922  }
1923 }
1924 
1925 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1926 /// these "soft stalls" differently than the hard stall cycles based on CPU
1927 /// resources and computed by checkHazard(). A fully in-order model
1928 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1929 /// available for scheduling until they are ready. However, a weaker in-order
1930 /// model may use this for heuristics. For example, if a processor has in-order
1931 /// behavior when reading certain resources, this may come into play.
1933  if (!SU->isUnbuffered)
1934  return 0;
1935 
1936  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1937  if (ReadyCycle > CurrCycle)
1938  return ReadyCycle - CurrCycle;
1939  return 0;
1940 }
1941 
1942 /// Compute the next cycle at which the given processor resource unit
1943 /// can be scheduled.
1945  unsigned Cycles) {
1946  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1947  // If this resource has never been used, always return cycle zero.
1948  if (NextUnreserved == InvalidCycle)
1949  return 0;
1950  // For bottom-up scheduling add the cycles needed for the current operation.
1951  if (!isTop())
1952  NextUnreserved += Cycles;
1953  return NextUnreserved;
1954 }
1955 
1956 /// Compute the next cycle at which the given processor resource can be
1957 /// scheduled. Returns the next cycle and the index of the processor resource
1958 /// instance in the reserved cycles vector.
1959 std::pair<unsigned, unsigned>
1960 SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1961  unsigned MinNextUnreserved = InvalidCycle;
1962  unsigned InstanceIdx = 0;
1963  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1964  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1965  assert(NumberOfInstances > 0 &&
1966  "Cannot have zero instances of a ProcResource");
1967 
1968  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1969  ++I) {
1970  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1971  if (MinNextUnreserved > NextUnreserved) {
1972  InstanceIdx = I;
1973  MinNextUnreserved = NextUnreserved;
1974  }
1975  }
1976  return std::make_pair(MinNextUnreserved, InstanceIdx);
1977 }
1978 
1979 /// Does this SU have a hazard within the current instruction group.
1980 ///
1981 /// The scheduler supports two modes of hazard recognition. The first is the
1982 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1983 /// supports highly complicated in-order reservation tables
1984 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1985 ///
1986 /// The second is a streamlined mechanism that checks for hazards based on
1987 /// simple counters that the scheduler itself maintains. It explicitly checks
1988 /// for instruction dispatch limitations, including the number of micro-ops that
1989 /// can dispatch per cycle.
1990 ///
1991 /// TODO: Also check whether the SU must start a new group.
1993  if (HazardRec->isEnabled()
1994  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1995  return true;
1996  }
1997 
1998  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1999  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2000  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2001  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2002  return true;
2003  }
2004 
2005  if (CurrMOps > 0 &&
2006  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2007  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2008  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2009  << (isTop() ? "begin" : "end") << " group\n");
2010  return true;
2011  }
2012 
2013  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2014  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2015  for (const MCWriteProcResEntry &PE :
2016  make_range(SchedModel->getWriteProcResBegin(SC),
2017  SchedModel->getWriteProcResEnd(SC))) {
2018  unsigned ResIdx = PE.ProcResourceIdx;
2019  unsigned Cycles = PE.Cycles;
2020  unsigned NRCycle, InstanceIdx;
2021  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2022  if (NRCycle > CurrCycle) {
2023 #ifndef NDEBUG
2024  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2025 #endif
2026  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2027  << SchedModel->getResourceName(ResIdx)
2028  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2029  << "=" << NRCycle << "c\n");
2030  return true;
2031  }
2032  }
2033  }
2034  return false;
2035 }
2036 
2037 // Find the unscheduled node in ReadySUs with the highest latency.
2038 unsigned SchedBoundary::
2040  SUnit *LateSU = nullptr;
2041  unsigned RemLatency = 0;
2042  for (SUnit *SU : ReadySUs) {
2043  unsigned L = getUnscheduledLatency(SU);
2044  if (L > RemLatency) {
2045  RemLatency = L;
2046  LateSU = SU;
2047  }
2048  }
2049  if (LateSU) {
2050  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2051  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2052  }
2053  return RemLatency;
2054 }
2055 
2056 // Count resources in this zone and the remaining unscheduled
2057 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2058 // resource index, or zero if the zone is issue limited.
2059 unsigned SchedBoundary::
2060 getOtherResourceCount(unsigned &OtherCritIdx) {
2061  OtherCritIdx = 0;
2062  if (!SchedModel->hasInstrSchedModel())
2063  return 0;
2064 
2065  unsigned OtherCritCount = Rem->RemIssueCount
2066  + (RetiredMOps * SchedModel->getMicroOpFactor());
2067  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2068  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2069  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2070  PIdx != PEnd; ++PIdx) {
2071  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2072  if (OtherCount > OtherCritCount) {
2073  OtherCritCount = OtherCount;
2074  OtherCritIdx = PIdx;
2075  }
2076  }
2077  if (OtherCritIdx) {
2078  LLVM_DEBUG(
2079  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2080  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2081  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2082  }
2083  return OtherCritCount;
2084 }
2085 
2086 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2087  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2088 
2089 #ifndef NDEBUG
2090  // ReadyCycle was been bumped up to the CurrCycle when this node was
2091  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2092  // scheduling, so may now be greater than ReadyCycle.
2093  if (ReadyCycle > CurrCycle)
2094  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2095 #endif
2096 
2097  if (ReadyCycle < MinReadyCycle)
2098  MinReadyCycle = ReadyCycle;
2099 
2100  // Check for interlocks first. For the purpose of other heuristics, an
2101  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2102  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2103  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2104  Available.size() >= ReadyListLimit)
2105  Pending.push(SU);
2106  else
2107  Available.push(SU);
2108 }
2109 
2110 /// Move the boundary of scheduled code by one cycle.
2111 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2112  if (SchedModel->getMicroOpBufferSize() == 0) {
2113  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2114  "MinReadyCycle uninitialized");
2115  if (MinReadyCycle > NextCycle)
2116  NextCycle = MinReadyCycle;
2117  }
2118  // Update the current micro-ops, which will issue in the next cycle.
2119  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2120  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2121 
2122  // Decrement DependentLatency based on the next cycle.
2123  if ((NextCycle - CurrCycle) > DependentLatency)
2124  DependentLatency = 0;
2125  else
2126  DependentLatency -= (NextCycle - CurrCycle);
2127 
2128  if (!HazardRec->isEnabled()) {
2129  // Bypass HazardRec virtual calls.
2130  CurrCycle = NextCycle;
2131  } else {
2132  // Bypass getHazardType calls in case of long latency.
2133  for (; CurrCycle != NextCycle; ++CurrCycle) {
2134  if (isTop())
2135  HazardRec->AdvanceCycle();
2136  else
2137  HazardRec->RecedeCycle();
2138  }
2139  }
2140  CheckPending = true;
2141  IsResourceLimited =
2142  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2143  getScheduledLatency(), true);
2144 
2145  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2146  << '\n');
2147 }
2148 
2149 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2150  ExecutedResCounts[PIdx] += Count;
2151  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2152  MaxExecutedResCount = ExecutedResCounts[PIdx];
2153 }
2154 
2155 /// Add the given processor resource to this scheduled zone.
2156 ///
2157 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2158 /// during which this resource is consumed.
2159 ///
2160 /// \return the next cycle at which the instruction may execute without
2161 /// oversubscribing resources.
2162 unsigned SchedBoundary::
2163 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2164  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2165  unsigned Count = Factor * Cycles;
2166  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2167  << Cycles << "x" << Factor << "u\n");
2168 
2169  // Update Executed resources counts.
2170  incExecutedResources(PIdx, Count);
2171  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2172  Rem->RemainingCounts[PIdx] -= Count;
2173 
2174  // Check if this resource exceeds the current critical resource. If so, it
2175  // becomes the critical resource.
2176  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2177  ZoneCritResIdx = PIdx;
2178  LLVM_DEBUG(dbgs() << " *** Critical resource "
2179  << SchedModel->getResourceName(PIdx) << ": "
2180  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2181  << "c\n");
2182  }
2183  // For reserved resources, record the highest cycle using the resource.
2184  unsigned NextAvailable, InstanceIdx;
2185  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2186  if (NextAvailable > CurrCycle) {
2187  LLVM_DEBUG(dbgs() << " Resource conflict: "
2188  << SchedModel->getResourceName(PIdx)
2189  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2190  << " reserved until @" << NextAvailable << "\n");
2191  }
2192  return NextAvailable;
2193 }
2194 
2195 /// Move the boundary of scheduled code by one SUnit.
2197  // Update the reservation table.
2198  if (HazardRec->isEnabled()) {
2199  if (!isTop() && SU->isCall) {
2200  // Calls are scheduled with their preceding instructions. For bottom-up
2201  // scheduling, clear the pipeline state before emitting.
2202  HazardRec->Reset();
2203  }
2204  HazardRec->EmitInstruction(SU);
2205  // Scheduling an instruction may have made pending instructions available.
2206  CheckPending = true;
2207  }
2208  // checkHazard should prevent scheduling multiple instructions per cycle that
2209  // exceed the issue width.
2210  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2211  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2212  assert(
2213  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2214  "Cannot schedule this instruction's MicroOps in the current cycle.");
2215 
2216  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2217  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2218 
2219  unsigned NextCycle = CurrCycle;
2220  switch (SchedModel->getMicroOpBufferSize()) {
2221  case 0:
2222  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2223  break;
2224  case 1:
2225  if (ReadyCycle > NextCycle) {
2226  NextCycle = ReadyCycle;
2227  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2228  }
2229  break;
2230  default:
2231  // We don't currently model the OOO reorder buffer, so consider all
2232  // scheduled MOps to be "retired". We do loosely model in-order resource
2233  // latency. If this instruction uses an in-order resource, account for any
2234  // likely stall cycles.
2235  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2236  NextCycle = ReadyCycle;
2237  break;
2238  }
2239  RetiredMOps += IncMOps;
2240 
2241  // Update resource counts and critical resource.
2242  if (SchedModel->hasInstrSchedModel()) {
2243  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2244  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2245  Rem->RemIssueCount -= DecRemIssue;
2246  if (ZoneCritResIdx) {
2247  // Scale scheduled micro-ops for comparing with the critical resource.
2248  unsigned ScaledMOps =
2249  RetiredMOps * SchedModel->getMicroOpFactor();
2250 
2251  // If scaled micro-ops are now more than the previous critical resource by
2252  // a full cycle, then micro-ops issue becomes critical.
2253  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2254  >= (int)SchedModel->getLatencyFactor()) {
2255  ZoneCritResIdx = 0;
2256  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2257  << ScaledMOps / SchedModel->getLatencyFactor()
2258  << "c\n");
2259  }
2260  }
2262  PI = SchedModel->getWriteProcResBegin(SC),
2263  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2264  unsigned RCycle =
2265  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2266  if (RCycle > NextCycle)
2267  NextCycle = RCycle;
2268  }
2269  if (SU->hasReservedResource) {
2270  // For reserved resources, record the highest cycle using the resource.
2271  // For top-down scheduling, this is the cycle in which we schedule this
2272  // instruction plus the number of cycles the operations reserves the
2273  // resource. For bottom-up is it simply the instruction's cycle.
2275  PI = SchedModel->getWriteProcResBegin(SC),
2276  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2277  unsigned PIdx = PI->ProcResourceIdx;
2278  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2279  unsigned ReservedUntil, InstanceIdx;
2280  std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2281  if (isTop()) {
2282  ReservedCycles[InstanceIdx] =
2283  std::max(ReservedUntil, NextCycle + PI->Cycles);
2284  } else
2285  ReservedCycles[InstanceIdx] = NextCycle;
2286  }
2287  }
2288  }
2289  }
2290  // Update ExpectedLatency and DependentLatency.
2291  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2292  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2293  if (SU->getDepth() > TopLatency) {
2294  TopLatency = SU->getDepth();
2295  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2296  << SU->NodeNum << ") " << TopLatency << "c\n");
2297  }
2298  if (SU->getHeight() > BotLatency) {
2299  BotLatency = SU->getHeight();
2300  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2301  << SU->NodeNum << ") " << BotLatency << "c\n");
2302  }
2303  // If we stall for any reason, bump the cycle.
2304  if (NextCycle > CurrCycle)
2305  bumpCycle(NextCycle);
2306  else
2307  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2308  // resource limited. If a stall occurred, bumpCycle does this.
2309  IsResourceLimited =
2310  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2311  getScheduledLatency(), true);
2312 
2313  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2314  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2315  // one cycle. Since we commonly reach the max MOps here, opportunistically
2316  // bump the cycle to avoid uselessly checking everything in the readyQ.
2317  CurrMOps += IncMOps;
2318 
2319  // Bump the cycle count for issue group constraints.
2320  // This must be done after NextCycle has been adjust for all other stalls.
2321  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2322  // currCycle to X.
2323  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2324  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2325  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2326  << " group\n");
2327  bumpCycle(++NextCycle);
2328  }
2329 
2330  while (CurrMOps >= SchedModel->getIssueWidth()) {
2331  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2332  << CurrCycle << '\n');
2333  bumpCycle(++NextCycle);
2334  }
2335  LLVM_DEBUG(dumpScheduledState());
2336 }
2337 
2338 /// Release pending ready nodes in to the available queue. This makes them
2339 /// visible to heuristics.
2341  // If the available queue is empty, it is safe to reset MinReadyCycle.
2342  if (Available.empty())
2343  MinReadyCycle = std::numeric_limits<unsigned>::max();
2344 
2345  // Check to see if any of the pending instructions are ready to issue. If
2346  // so, add them to the available queue.
2347  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2348  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2349  SUnit *SU = *(Pending.begin()+i);
2350  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2351 
2352  if (ReadyCycle < MinReadyCycle)
2353  MinReadyCycle = ReadyCycle;
2354 
2355  if (!IsBuffered && ReadyCycle > CurrCycle)
2356  continue;
2357 
2358  if (checkHazard(SU))
2359  continue;
2360 
2361  if (Available.size() >= ReadyListLimit)
2362  break;
2363 
2364  Available.push(SU);
2365  Pending.remove(Pending.begin()+i);
2366  --i; --e;
2367  }
2368  CheckPending = false;
2369 }
2370 
2371 /// Remove SU from the ready set for this boundary.
2373  if (Available.isInQueue(SU))
2374  Available.remove(Available.find(SU));
2375  else {
2376  assert(Pending.isInQueue(SU) && "bad ready count");
2377  Pending.remove(Pending.find(SU));
2378  }
2379 }
2380 
2381 /// If this queue only has one ready candidate, return it. As a side effect,
2382 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2383 /// one node is ready. If multiple instructions are ready, return NULL.
2385  if (CheckPending)
2386  releasePending();
2387 
2388  if (CurrMOps > 0) {
2389  // Defer any ready instrs that now have a hazard.
2390  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2391  if (checkHazard(*I)) {
2392  Pending.push(*I);
2393  I = Available.remove(I);
2394  continue;
2395  }
2396  ++I;
2397  }
2398  }
2399  for (unsigned i = 0; Available.empty(); ++i) {
2400 // FIXME: Re-enable assert once PR20057 is resolved.
2401 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2402 // "permanent hazard");
2403  (void)i;
2404  bumpCycle(CurrCycle + 1);
2405  releasePending();
2406  }
2407 
2408  LLVM_DEBUG(Pending.dump());
2409  LLVM_DEBUG(Available.dump());
2410 
2411  if (Available.size() == 1)
2412  return *Available.begin();
2413  return nullptr;
2414 }
2415 
2416 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2417 // This is useful information to dump after bumpNode.
2418 // Note that the Queue contents are more useful before pickNodeFromQueue.
2420  unsigned ResFactor;
2421  unsigned ResCount;
2422  if (ZoneCritResIdx) {
2423  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2424  ResCount = getResourceCount(ZoneCritResIdx);
2425  } else {
2426  ResFactor = SchedModel->getMicroOpFactor();
2427  ResCount = RetiredMOps * ResFactor;
2428  }
2429  unsigned LFactor = SchedModel->getLatencyFactor();
2430  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2431  << " Retired: " << RetiredMOps;
2432  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2433  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2434  << ResCount / ResFactor << " "
2435  << SchedModel->getResourceName(ZoneCritResIdx)
2436  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2437  << (IsResourceLimited ? " - Resource" : " - Latency")
2438  << " limited.\n";
2439 }
2440 #endif
2441 
2442 //===----------------------------------------------------------------------===//
2443 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2444 //===----------------------------------------------------------------------===//
2445 
2448  const TargetSchedModel *SchedModel) {
2449  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2450  return;
2451 
2452  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2454  PI = SchedModel->getWriteProcResBegin(SC),
2455  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2456  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2457  ResDelta.CritResources += PI->Cycles;
2458  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2459  ResDelta.DemandedResources += PI->Cycles;
2460  }
2461 }
2462 
2463 /// Compute remaining latency. We need this both to determine whether the
2464 /// overall schedule has become latency-limited and whether the instructions
2465 /// outside this zone are resource or latency limited.
2466 ///
2467 /// The "dependent" latency is updated incrementally during scheduling as the
2468 /// max height/depth of scheduled nodes minus the cycles since it was
2469 /// scheduled:
2470 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2471 ///
2472 /// The "independent" latency is the max ready queue depth:
2473 /// ILat = max N.depth for N in Available|Pending
2474 ///
2475 /// RemainingLatency is the greater of independent and dependent latency.
2476 ///
2477 /// These computations are expensive, especially in DAGs with many edges, so
2478 /// only do them if necessary.
2479 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2480  unsigned RemLatency = CurrZone.getDependentLatency();
2481  RemLatency = std::max(RemLatency,
2482  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2483  RemLatency = std::max(RemLatency,
2484  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2485  return RemLatency;
2486 }
2487 
2488 /// Returns true if the current cycle plus remaning latency is greater than
2489 /// the critical path in the scheduling region.
2490 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2491  SchedBoundary &CurrZone,
2492  bool ComputeRemLatency,
2493  unsigned &RemLatency) const {
2494  // The current cycle is already greater than the critical path, so we are
2495  // already latency limited and don't need to compute the remaining latency.
2496  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2497  return true;
2498 
2499  // If we haven't scheduled anything yet, then we aren't latency limited.
2500  if (CurrZone.getCurrCycle() == 0)
2501  return false;
2502 
2503  if (ComputeRemLatency)
2504  RemLatency = computeRemLatency(CurrZone);
2505 
2506  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2507 }
2508 
2509 /// Set the CandPolicy given a scheduling zone given the current resources and
2510 /// latencies inside and outside the zone.
2511 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2512  SchedBoundary &CurrZone,
2513  SchedBoundary *OtherZone) {
2514  // Apply preemptive heuristics based on the total latency and resources
2515  // inside and outside this zone. Potential stalls should be considered before
2516  // following this policy.
2517 
2518  // Compute the critical resource outside the zone.
2519  unsigned OtherCritIdx = 0;
2520  unsigned OtherCount =
2521  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2522 
2523  bool OtherResLimited = false;
2524  unsigned RemLatency = 0;
2525  bool RemLatencyComputed = false;
2526  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2527  RemLatency = computeRemLatency(CurrZone);
2528  RemLatencyComputed = true;
2529  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2530  OtherCount, RemLatency, false);
2531  }
2532 
2533  // Schedule aggressively for latency in PostRA mode. We don't check for
2534  // acyclic latency during PostRA, and highly out-of-order processors will
2535  // skip PostRA scheduling.
2536  if (!OtherResLimited &&
2537  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2538  RemLatency))) {
2539  Policy.ReduceLatency |= true;
2540  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2541  << " RemainingLatency " << RemLatency << " + "
2542  << CurrZone.getCurrCycle() << "c > CritPath "
2543  << Rem.CriticalPath << "\n");
2544  }
2545  // If the same resource is limiting inside and outside the zone, do nothing.
2546  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2547  return;
2548 
2549  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2550  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2551  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2552  } if (OtherResLimited) dbgs()
2553  << " RemainingLimit: "
2554  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2555  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2556  << " Latency limited both directions.\n");
2557 
2558  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2559  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2560 
2561  if (OtherResLimited)
2562  Policy.DemandResIdx = OtherCritIdx;
2563 }
2564 
2565 #ifndef NDEBUG
2568  switch (Reason) {
2569  case NoCand: return "NOCAND ";
2570  case Only1: return "ONLY1 ";
2571  case PhysReg: return "PHYS-REG ";
2572  case RegExcess: return "REG-EXCESS";
2573  case RegCritical: return "REG-CRIT ";
2574  case Stall: return "STALL ";
2575  case Cluster: return "CLUSTER ";
2576  case Weak: return "WEAK ";
2577  case RegMax: return "REG-MAX ";
2578  case ResourceReduce: return "RES-REDUCE";
2579  case ResourceDemand: return "RES-DEMAND";
2580  case TopDepthReduce: return "TOP-DEPTH ";
2581  case TopPathReduce: return "TOP-PATH ";
2582  case BotHeightReduce:return "BOT-HEIGHT";
2583  case BotPathReduce: return "BOT-PATH ";
2584  case NextDefUse: return "DEF-USE ";
2585  case NodeOrder: return "ORDER ";
2586  };
2587  llvm_unreachable("Unknown reason!");
2588 }
2589 
2591  PressureChange P;
2592  unsigned ResIdx = 0;
2593  unsigned Latency = 0;
2594  switch (Cand.Reason) {
2595  default:
2596  break;
2597  case RegExcess:
2598  P = Cand.RPDelta.Excess;
2599  break;
2600  case RegCritical:
2601  P = Cand.RPDelta.CriticalMax;
2602  break;
2603  case RegMax:
2604  P = Cand.RPDelta.CurrentMax;
2605  break;
2606  case ResourceReduce:
2607  ResIdx = Cand.Policy.ReduceResIdx;
2608  break;
2609  case ResourceDemand:
2610  ResIdx = Cand.Policy.DemandResIdx;
2611  break;
2612  case TopDepthReduce:
2613  Latency = Cand.SU->getDepth();
2614  break;
2615  case TopPathReduce:
2616  Latency = Cand.SU->getHeight();
2617  break;
2618  case BotHeightReduce:
2619  Latency = Cand.SU->getHeight();
2620  break;
2621  case BotPathReduce:
2622  Latency = Cand.SU->getDepth();
2623  break;
2624  }
2625  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2626  if (P.isValid())
2627  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2628  << ":" << P.getUnitInc() << " ";
2629  else
2630  dbgs() << " ";
2631  if (ResIdx)
2632  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2633  else
2634  dbgs() << " ";
2635  if (Latency)
2636  dbgs() << " " << Latency << " cycles ";
2637  else
2638  dbgs() << " ";
2639  dbgs() << '\n';
2640 }
2641 #endif
2642 
2643 namespace llvm {
2644 /// Return true if this heuristic determines order.
2645 bool tryLess(int TryVal, int CandVal,
2649  if (TryVal < CandVal) {
2650  TryCand.Reason = Reason;
2651  return true;
2652  }
2653  if (TryVal > CandVal) {
2654  if (Cand.Reason > Reason)
2655  Cand.Reason = Reason;
2656  return true;
2657  }
2658  return false;
2659 }
2660 
2661 bool tryGreater(int TryVal, int CandVal,
2665  if (TryVal > CandVal) {
2666  TryCand.Reason = Reason;
2667  return true;
2668  }
2669  if (TryVal < CandVal) {
2670  if (Cand.Reason > Reason)
2671  Cand.Reason = Reason;
2672  return true;
2673  }
2674  return false;
2675 }
2676 
2679  SchedBoundary &Zone) {
2680  if (Zone.isTop()) {
2681  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2682  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2683  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2684  return true;
2685  }
2686  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2687  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2688  return true;
2689  } else {
2690  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2691  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2692  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2693  return true;
2694  }
2695  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2696  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2697  return true;
2698  }
2699  return false;
2700 }
2701 } // end namespace llvm
2702 
2703 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2704  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2705  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2706 }
2707 
2709  tracePick(Cand.Reason, Cand.AtTop);
2710 }
2711 
2713  assert(dag->hasVRegLiveness() &&
2714  "(PreRA)GenericScheduler needs vreg liveness");
2715  DAG = static_cast<ScheduleDAGMILive*>(dag);
2716  SchedModel = DAG->getSchedModel();
2717  TRI = DAG->TRI;
2718 
2719  Rem.init(DAG, SchedModel);
2720  Top.init(DAG, SchedModel, &Rem);
2721  Bot.init(DAG, SchedModel, &Rem);
2722 
2723  // Initialize resource counts.
2724 
2725  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2726  // are disabled, then these HazardRecs will be disabled.
2727  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2728  if (!Top.HazardRec) {
2729  Top.HazardRec =
2730  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2731  Itin, DAG);
2732  }
2733  if (!Bot.HazardRec) {
2734  Bot.HazardRec =
2735  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2736  Itin, DAG);
2737  }
2738  TopCand.SU = nullptr;
2739  BotCand.SU = nullptr;
2740 }
2741 
2742 /// Initialize the per-region scheduling policy.
2745  unsigned NumRegionInstrs) {
2746  const MachineFunction &MF = *Begin->getMF();
2747  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2748 
2749  // Avoid setting up the register pressure tracker for small regions to save
2750  // compile time. As a rough heuristic, only track pressure when the number of
2751  // schedulable instructions exceeds half the integer register file.
2752  RegionPolicy.ShouldTrackPressure = true;
2753  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2754  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2755  if (TLI->isTypeLegal(LegalIntVT)) {
2756  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2757  TLI->getRegClassFor(LegalIntVT));
2758  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2759  }
2760  }
2761 
2762  // For generic targets, we default to bottom-up, because it's simpler and more
2763  // compile-time optimizations have been implemented in that direction.
2764  RegionPolicy.OnlyBottomUp = true;
2765 
2766  // Allow the subtarget to override default policy.
2767  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2768 
2769  // After subtarget overrides, apply command line options.
2770  if (!EnableRegPressure) {
2771  RegionPolicy.ShouldTrackPressure = false;
2772  RegionPolicy.ShouldTrackLaneMasks = false;
2773  }
2774 
2775  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2776  // e.g. -misched-bottomup=false allows scheduling in both directions.
2778  "-misched-topdown incompatible with -misched-bottomup");
2779  if (ForceBottomUp.getNumOccurrences() > 0) {
2780  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2781  if (RegionPolicy.OnlyBottomUp)
2782  RegionPolicy.OnlyTopDown = false;
2783  }
2784  if (ForceTopDown.getNumOccurrences() > 0) {
2785  RegionPolicy.OnlyTopDown = ForceTopDown;
2786  if (RegionPolicy.OnlyTopDown)
2787  RegionPolicy.OnlyBottomUp = false;
2788  }
2789 }
2790 
2792  // Cannot completely remove virtual function even in release mode.
2793 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2794  dbgs() << "GenericScheduler RegionPolicy: "
2795  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2796  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2797  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2798  << "\n";
2799 #endif
2800 }
2801 
2802 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2803 /// critical path by more cycles than it takes to drain the instruction buffer.
2804 /// We estimate an upper bounds on in-flight instructions as:
2805 ///
2806 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2807 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2808 /// InFlightResources = InFlightIterations * LoopResources
2809 ///
2810 /// TODO: Check execution resources in addition to IssueCount.
2812  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2813  return;
2814 
2815  // Scaled number of cycles per loop iteration.
2816  unsigned IterCount =
2817  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2818  Rem.RemIssueCount);
2819  // Scaled acyclic critical path.
2820  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2821  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2822  unsigned InFlightCount =
2823  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2824  unsigned BufferLimit =
2825  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2826 
2827  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2828 
2829  LLVM_DEBUG(
2830  dbgs() << "IssueCycles="
2831  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2832  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2833  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2834  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2835  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2836  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2837 }
2838 
2840  Rem.CriticalPath = DAG->ExitSU.getDepth();
2841 
2842  // Some roots may not feed into ExitSU. Check all of them in case.
2843  for (const SUnit *SU : Bot.Available) {
2844  if (SU->getDepth() > Rem.CriticalPath)
2845  Rem.CriticalPath = SU->getDepth();
2846  }
2847  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2848  if (DumpCriticalPathLength) {
2849  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2850  }
2851 
2852  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2853  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2854  checkAcyclicLatency();
2855  }
2856 }
2857 
2858 namespace llvm {
2859 bool tryPressure(const PressureChange &TryP,
2860  const PressureChange &CandP,
2864  const TargetRegisterInfo *TRI,
2865  const MachineFunction &MF) {
2866  // If one candidate decreases and the other increases, go with it.
2867  // Invalid candidates have UnitInc==0.
2868  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2869  Reason)) {
2870  return true;
2871  }
2872  // Do not compare the magnitude of pressure changes between top and bottom
2873  // boundary.
2874  if (Cand.AtTop != TryCand.AtTop)
2875  return false;
2876 
2877  // If both candidates affect the same set in the same boundary, go with the
2878  // smallest increase.
2879  unsigned TryPSet = TryP.getPSetOrMax();
2880  unsigned CandPSet = CandP.getPSetOrMax();
2881  if (TryPSet == CandPSet) {
2882  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2883  Reason);
2884  }
2885 
2886  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2888 
2889  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2891 
2892  // If the candidates are decreasing pressure, reverse priority.
2893  if (TryP.getUnitInc() < 0)
2894  std::swap(TryRank, CandRank);
2895  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2896 }
2897 
2898 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2899  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2900 }
2901 
2902 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2903 /// their physreg def/use.
2904 ///
2905 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2906 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2907 /// with the operation that produces or consumes the physreg. We'll do this when
2908 /// regalloc has support for parallel copies.
2909 int biasPhysReg(const SUnit *SU, bool isTop) {
2910  const MachineInstr *MI = SU->getInstr();
2911 
2912  if (MI->isCopy()) {
2913  unsigned ScheduledOper = isTop ? 1 : 0;
2914  unsigned UnscheduledOper = isTop ? 0 : 1;
2915  // If we have already scheduled the physreg produce/consumer, immediately
2916  // schedule the copy.
2918  MI->getOperand(ScheduledOper).getReg()))
2919  return 1;
2920  // If the physreg is at the boundary, defer it. Otherwise schedule it
2921  // immediately to free the dependent. We can hoist the copy later.
2922  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2924  MI->getOperand(UnscheduledOper).getReg()))
2925  return AtBoundary ? -1 : 1;
2926  }
2927 
2928  if (MI->isMoveImmediate()) {
2929  // If we have a move immediate and all successors have been assigned, bias
2930  // towards scheduling this later. Make sure all register defs are to
2931  // physical registers.
2932  bool DoBias = true;
2933  for (const MachineOperand &Op : MI->defs()) {
2934  if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
2935  DoBias = false;
2936  break;
2937  }
2938  }
2939 
2940  if (DoBias)
2941  return isTop ? -1 : 1;
2942  }
2943 
2944  return 0;
2945 }
2946 } // end namespace llvm
2947 
2949  bool AtTop,
2950  const RegPressureTracker &RPTracker,
2951  RegPressureTracker &TempTracker) {
2952  Cand.SU = SU;
2953  Cand.AtTop = AtTop;
2954  if (DAG->isTrackingPressure()) {
2955  if (AtTop) {
2956  TempTracker.getMaxDownwardPressureDelta(
2957  Cand.SU->getInstr(),
2958  Cand.RPDelta,
2959  DAG->getRegionCriticalPSets(),
2961  } else {
2962  if (VerifyScheduling) {
2963  TempTracker.getMaxUpwardPressureDelta(
2964  Cand.SU->getInstr(),
2965  &DAG->getPressureDiff(Cand.SU),
2966  Cand.RPDelta,
2967  DAG->getRegionCriticalPSets(),
2969  } else {
2970  RPTracker.getUpwardPressureDelta(
2971  Cand.SU->getInstr(),
2972  DAG->getPressureDiff(Cand.SU),
2973  Cand.RPDelta,
2974  DAG->getRegionCriticalPSets(),
2976  }
2977  }
2978  }
2979  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2980  << " Try SU(" << Cand.SU->NodeNum << ") "
2981  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2982  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2983 }
2984 
2985 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2986 /// hierarchical. This may be more efficient than a graduated cost model because
2987 /// we don't need to evaluate all aspects of the model for each node in the
2988 /// queue. But it's really done to make the heuristics easier to debug and
2989 /// statistically analyze.
2990 ///
2991 /// \param Cand provides the policy and current best candidate.
2992 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2993 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2994 // if Cand is from a different zone than TryCand.
2996  SchedCandidate &TryCand,
2997  SchedBoundary *Zone) const {
2998  // Initialize the candidate if needed.
2999  if (!Cand.isValid()) {
3000  TryCand.Reason = NodeOrder;
3001  return;
3002  }
3003 
3004  // Bias PhysReg Defs and copies to their uses and defined respectively.
3005  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3006  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3007  return;
3008 
3009  // Avoid exceeding the target's limit.
3010  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3011  Cand.RPDelta.Excess,
3012  TryCand, Cand, RegExcess, TRI,
3013  DAG->MF))
3014  return;
3015 
3016  // Avoid increasing the max critical pressure in the scheduled region.
3017  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3018  Cand.RPDelta.CriticalMax,
3019  TryCand, Cand, RegCritical, TRI,
3020  DAG->MF))
3021  return;
3022 
3023  // We only compare a subset of features when comparing nodes between
3024  // Top and Bottom boundary. Some properties are simply incomparable, in many
3025  // other instances we should only override the other boundary if something
3026  // is a clear good pick on one boundary. Skip heuristics that are more
3027  // "tie-breaking" in nature.
3028  bool SameBoundary = Zone != nullptr;
3029  if (SameBoundary) {
3030  // For loops that are acyclic path limited, aggressively schedule for
3031  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3032  // heuristics to take precedence.
3033  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3034  tryLatency(TryCand, Cand, *Zone))
3035  return;
3036 
3037  // Prioritize instructions that read unbuffered resources by stall cycles.
3038  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3039  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3040  return;
3041  }
3042 
3043  // Keep clustered nodes together to encourage downstream peephole
3044  // optimizations which may reduce resource requirements.
3045  //
3046  // This is a best effort to set things up for a post-RA pass. Optimizations
3047  // like generating loads of multiple registers should ideally be done within
3048  // the scheduler pass by combining the loads during DAG postprocessing.
3049  const SUnit *CandNextClusterSU =
3050  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3051  const SUnit *TryCandNextClusterSU =
3052  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3053  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3054  Cand.SU == CandNextClusterSU,
3055  TryCand, Cand, Cluster))
3056  return;
3057 
3058  if (SameBoundary) {
3059  // Weak edges are for clustering and other constraints.
3060  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3061  getWeakLeft(Cand.SU, Cand.AtTop),
3062  TryCand, Cand, Weak))
3063  return;
3064  }
3065 
3066  // Avoid increasing the max pressure of the entire region.
3067  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3068  Cand.RPDelta.CurrentMax,
3069  TryCand, Cand, RegMax, TRI,
3070  DAG->MF))
3071  return;
3072 
3073  if (SameBoundary) {
3074  // Avoid critical resource consumption and balance the schedule.
3075  TryCand.initResourceDelta(DAG, SchedModel);
3077  TryCand, Cand, ResourceReduce))
3078  return;
3081  TryCand, Cand, ResourceDemand))
3082  return;
3083 
3084  // Avoid serializing long latency dependence chains.
3085  // For acyclic path limited loops, latency was already checked above.
3086  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3087  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3088  return;
3089 
3090  // Fall through to original instruction order.
3091  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3092  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3093  TryCand.Reason = NodeOrder;
3094  }
3095  }
3096 }
3097 
3098 /// Pick the best candidate from the queue.
3099 ///
3100 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3101 /// DAG building. To adjust for the current scheduling location we need to
3102 /// maintain the number of vreg uses remaining to be top-scheduled.
3104  const CandPolicy &ZonePolicy,
3105  const RegPressureTracker &RPTracker,
3106  SchedCandidate &Cand) {
3107  // getMaxPressureDelta temporarily modifies the tracker.
3108  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3109 
3110  ReadyQueue &Q = Zone.Available;
3111  for (SUnit *SU : Q) {
3112 
3113  SchedCandidate TryCand(ZonePolicy);
3114  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3115  // Pass SchedBoundary only when comparing nodes from the same boundary.
3116  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3117  tryCandidate(Cand, TryCand, ZoneArg);
3118  if (TryCand.Reason != NoCand) {
3119  // Initialize resource delta if needed in case future heuristics query it.
3120  if (TryCand.ResDelta == SchedResourceDelta())
3121  TryCand.initResourceDelta(DAG, SchedModel);
3122  Cand.setBest(TryCand);
3123  LLVM_DEBUG(traceCandidate(Cand));
3124  }
3125  }
3126 }
3127 
3128 /// Pick the best candidate node from either the top or bottom queue.
3130  // Schedule as far as possible in the direction of no choice. This is most
3131  // efficient, but also provides the best heuristics for CriticalPSets.
3132  if (SUnit *SU = Bot.pickOnlyChoice()) {
3133  IsTopNode = false;
3134  tracePick(Only1, false);
3135  return SU;
3136  }
3137  if (SUnit *SU = Top.pickOnlyChoice()) {
3138  IsTopNode = true;
3139  tracePick(Only1, true);
3140  return SU;
3141  }
3142  // Set the bottom-up policy based on the state of the current bottom zone and
3143  // the instructions outside the zone, including the top zone.
3144  CandPolicy BotPolicy;
3145  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3146  // Set the top-down policy based on the state of the current top zone and
3147  // the instructions outside the zone, including the bottom zone.
3148  CandPolicy TopPolicy;
3149  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3150 
3151  // See if BotCand is still valid (because we previously scheduled from Top).
3152  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3153  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3154  BotCand.Policy != BotPolicy) {
3155  BotCand.reset(CandPolicy());
3156  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3157  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3158  } else {
3159  LLVM_DEBUG(traceCandidate(BotCand));
3160 #ifndef NDEBUG
3161  if (VerifyScheduling) {
3162  SchedCandidate TCand;
3163  TCand.reset(CandPolicy());
3164  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3165  assert(TCand.SU == BotCand.SU &&
3166  "Last pick result should correspond to re-picking right now");
3167  }
3168 #endif
3169  }
3170 
3171  // Check if the top Q has a better candidate.
3172  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3173  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3174  TopCand.Policy != TopPolicy) {
3175  TopCand.reset(CandPolicy());
3176  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3177  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3178  } else {
3179  LLVM_DEBUG(traceCandidate(TopCand));
3180 #ifndef NDEBUG
3181  if (VerifyScheduling) {
3182  SchedCandidate TCand;
3183  TCand.reset(CandPolicy());
3184  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3185  assert(TCand.SU == TopCand.SU &&
3186  "Last pick result should correspond to re-picking right now");
3187  }
3188 #endif
3189  }
3190 
3191  // Pick best from BotCand and TopCand.
3192  assert(BotCand.isValid());
3193  assert(TopCand.isValid());
3194  SchedCandidate Cand = BotCand;
3195  TopCand.Reason = NoCand;
3196  tryCandidate(Cand, TopCand, nullptr);
3197  if (TopCand.Reason != NoCand) {
3198  Cand.setBest(TopCand);
3199  LLVM_DEBUG(traceCandidate(Cand));
3200  }
3201 
3202  IsTopNode = Cand.AtTop;
3203  tracePick(Cand);
3204  return Cand.SU;
3205 }
3206 
3207 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3209  if (DAG->top() == DAG->bottom()) {
3210  assert(Top.Available.empty() && Top.Pending.empty() &&
3211  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3212  return nullptr;
3213  }
3214  SUnit *SU;
3215  do {
3216  if (RegionPolicy.OnlyTopDown) {
3217  SU = Top.pickOnlyChoice();
3218  if (!SU) {
3219  CandPolicy NoPolicy;
3220  TopCand.reset(NoPolicy);
3221  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3222  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3223  tracePick(TopCand);
3224  SU = TopCand.SU;
3225  }
3226  IsTopNode = true;
3227  } else if (RegionPolicy.OnlyBottomUp) {
3228  SU = Bot.pickOnlyChoice();
3229  if (!SU) {
3230  CandPolicy NoPolicy;
3231  BotCand.reset(NoPolicy);
3232  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3233  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3234  tracePick(BotCand);
3235  SU = BotCand.SU;
3236  }
3237  IsTopNode = false;
3238  } else {
3239  SU = pickNodeBidirectional(IsTopNode);
3240  }
3241  } while (SU->isScheduled);
3242 
3243  if (SU->isTopReady())
3244  Top.removeReady(SU);
3245  if (SU->isBottomReady())
3246  Bot.removeReady(SU);
3247 
3248  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3249  << *SU->getInstr());
3250  return SU;
3251 }
3252 
3254  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3255  if (!isTop)
3256  ++InsertPos;
3257  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3258 
3259  // Find already scheduled copies with a single physreg dependence and move
3260  // them just above the scheduled instruction.
3261  for (SDep &Dep : Deps) {
3262  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3263  continue;
3264  SUnit *DepSU = Dep.getSUnit();
3265  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3266  continue;
3267  MachineInstr *Copy = DepSU->getInstr();
3268  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3269  continue;
3270  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3271  DAG->dumpNode(*Dep.getSUnit()));
3272  DAG->moveInstruction(Copy, InsertPos);
3273  }
3274 }
3275 
3276 /// Update the scheduler's state after scheduling a node. This is the same node
3277 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3278 /// update it's state based on the current cycle before MachineSchedStrategy
3279 /// does.
3280 ///
3281 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3282 /// them here. See comments in biasPhysReg.
3283 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3284  if (IsTopNode) {
3285  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3286  Top.bumpNode(SU);
3287  if (SU->hasPhysRegUses)
3288  reschedulePhysReg(SU, true);
3289  } else {
3290  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3291  Bot.bumpNode(SU);
3292  if (SU->hasPhysRegDefs)
3293  reschedulePhysReg(SU, false);
3294  }
3295 }
3296 
3297 /// Create the standard converging machine scheduler. This will be used as the
3298 /// default scheduler if the target does not set a default.
3300  ScheduleDAGMILive *DAG =
3301  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3302  // Register DAG post-processors.
3303  //
3304  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3305  // data and pass it to later mutations. Have a single mutation that gathers
3306  // the interesting nodes in one pass.
3308  return DAG;
3309 }
3310 
3312  return createGenericSchedLive(C);
3313 }
3314 
3315 static MachineSchedRegistry
3316 GenericSchedRegistry("converge", "Standard converging scheduler.",
3318 
3319 //===----------------------------------------------------------------------===//
3320 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3321 //===----------------------------------------------------------------------===//
3322 
3324  DAG = Dag;
3325  SchedModel = DAG->getSchedModel();
3326  TRI = DAG->TRI;
3327 
3328  Rem.init(DAG, SchedModel);
3329  Top.init(DAG, SchedModel, &Rem);
3330  BotRoots.clear();
3331 
3332  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3333  // or are disabled, then these HazardRecs will be disabled.
3334  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3335  if (!Top.HazardRec) {
3336  Top.HazardRec =
3338  Itin, DAG);
3339  }
3340 }
3341 
3343  Rem.CriticalPath = DAG->ExitSU.getDepth();
3344 
3345  // Some roots may not feed into ExitSU. Check all of them in case.
3346  for (const SUnit *SU : BotRoots) {
3347  if (SU->getDepth() > Rem.CriticalPath)
3348  Rem.CriticalPath = SU->getDepth();
3349  }
3350  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3351  if (DumpCriticalPathLength) {
3352  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3353  }
3354 }
3355 
3356 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3357 ///
3358 /// \param Cand provides the policy and current best candidate.
3359 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3361  SchedCandidate &TryCand) {
3362  // Initialize the candidate if needed.
3363  if (!Cand.isValid()) {
3364  TryCand.Reason = NodeOrder;
3365  return;
3366  }
3367 
3368  // Prioritize instructions that read unbuffered resources by stall cycles.
3369  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3370  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3371  return;
3372 
3373  // Keep clustered nodes together.
3374  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3375  Cand.SU == DAG->getNextClusterSucc(),
3376  TryCand, Cand, Cluster))
3377  return;
3378 
3379  // Avoid critical resource consumption and balance the schedule.
3380  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3381  TryCand, Cand, ResourceReduce))
3382  return;
3384  Cand.ResDelta.DemandedResources,
3385  TryCand, Cand, ResourceDemand))
3386  return;
3387 
3388  // Avoid serializing long latency dependence chains.
3389  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3390  return;
3391  }
3392 
3393  // Fall through to original instruction order.
3394  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3395  TryCand.Reason = NodeOrder;
3396 }
3397 
3399  ReadyQueue &Q = Top.Available;
3400  for (SUnit *SU : Q) {
3401  SchedCandidate TryCand(Cand.Policy);
3402  TryCand.SU = SU;
3403  TryCand.AtTop = true;
3404  TryCand.initResourceDelta(DAG, SchedModel);
3405  tryCandidate(Cand, TryCand);
3406  if (TryCand.Reason != NoCand) {
3407  Cand.setBest(TryCand);
3408  LLVM_DEBUG(traceCandidate(Cand));
3409  }
3410  }
3411 }
3412 
3413 /// Pick the next node to schedule.
3415  if (DAG->top() == DAG->bottom()) {
3416  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3417  return nullptr;
3418  }
3419  SUnit *SU;
3420  do {
3421  SU = Top.pickOnlyChoice();
3422  if (SU) {
3423  tracePick(Only1, true);
3424  } else {
3425  CandPolicy NoPolicy;
3426  SchedCandidate TopCand(NoPolicy);
3427  // Set the top-down policy based on the state of the current top zone and
3428  // the instructions outside the zone, including the bottom zone.
3429  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3430  pickNodeFromQueue(TopCand);
3431  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3432  tracePick(TopCand);
3433  SU = TopCand.SU;
3434  }
3435  } while (SU->isScheduled);
3436 
3437  IsTopNode = true;
3438  Top.removeReady(SU);
3439 
3440  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3441  << *SU->getInstr());
3442  return SU;
3443 }
3444 
3445 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3446 /// scheduled/remaining flags in the DAG nodes.
3447 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3448  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3449  Top.bumpNode(SU);
3450 }
3451 
3453  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3454  /*RemoveKillFlags=*/true);
3455 }
3456 
3457 //===----------------------------------------------------------------------===//
3458 // ILP Scheduler. Currently for experimental analysis of heuristics.
3459 //===----------------------------------------------------------------------===//
3460 
3461 namespace {
3462 
3463 /// Order nodes by the ILP metric.
3464 struct ILPOrder {
3465  const SchedDFSResult *DFSResult = nullptr;
3466  const BitVector *ScheduledTrees = nullptr;
3467  bool MaximizeILP;
3468 
3469  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3470 
3471  /// Apply a less-than relation on node priority.
3472  ///
3473  /// (Return true if A comes after B in the Q.)
3474  bool operator()(const SUnit *A, const SUnit *B) const {
3475  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3476  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3477  if (SchedTreeA != SchedTreeB) {
3478  // Unscheduled trees have lower priority.
3479  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3480  return ScheduledTrees->test(SchedTreeB);
3481 
3482  // Trees with shallower connections have have lower priority.
3483  if (DFSResult->getSubtreeLevel(SchedTreeA)
3484  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3485  return DFSResult->getSubtreeLevel(SchedTreeA)
3486  < DFSResult->getSubtreeLevel(SchedTreeB);
3487  }
3488  }
3489  if (MaximizeILP)
3490  return DFSResult->getILP(A) < DFSResult->getILP(B);
3491  else
3492  return DFSResult->getILP(A) > DFSResult->getILP(B);
3493  }
3494 };
3495 
3496 /// Schedule based on the ILP metric.
3497 class ILPScheduler : public MachineSchedStrategy {
3498  ScheduleDAGMILive *DAG = nullptr;
3499  ILPOrder Cmp;
3500 
3501  std::vector<SUnit*> ReadyQ;
3502 
3503 public:
3504  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3505 
3506  void initialize(ScheduleDAGMI *dag) override {
3507  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3508  DAG = static_cast<ScheduleDAGMILive*>(dag);
3509  DAG->computeDFSResult();
3510  Cmp.DFSResult = DAG->getDFSResult();
3511  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3512  ReadyQ.clear();
3513  }
3514 
3515  void registerRoots() override {
3516  // Restore the heap in ReadyQ with the updated DFS results.
3517  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3518  }
3519 
3520  /// Implement MachineSchedStrategy interface.
3521  /// -----------------------------------------
3522 
3523  /// Callback to select the highest priority node from the ready Q.
3524  SUnit *pickNode(bool &IsTopNode) override {
3525  if (ReadyQ.empty()) return nullptr;
3526  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3527  SUnit *SU = ReadyQ.back();
3528  ReadyQ.pop_back();
3529  IsTopNode = false;
3530  LLVM_DEBUG(dbgs() << "Pick node "
3531  << "SU(" << SU->NodeNum << ") "
3532  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3533  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3534  << " @"
3535  << DAG->getDFSResult()->getSubtreeLevel(
3536  DAG->getDFSResult()->getSubtreeID(SU))
3537  << '\n'
3538  << "Scheduling " << *SU->getInstr());
3539  return SU;
3540  }
3541 
3542  /// Scheduler callback to notify that a new subtree is scheduled.
3543  void scheduleTree(unsigned SubtreeID) override {
3544  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3545  }
3546 
3547  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3548  /// DFSResults, and resort the priority Q.
3549  void schedNode(SUnit *SU, bool IsTopNode) override {
3550  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3551  }
3552 
3553  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3554 
3555  void releaseBottomNode(SUnit *SU) override {
3556  ReadyQ.push_back(SU);
3557  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3558  }
3559 };
3560 
3561 } // end anonymous namespace
3562 
3564  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3565 }
3567  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3568 }
3569 
3571  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3573  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3574 
3575 //===----------------------------------------------------------------------===//
3576 // Machine Instruction Shuffler for Correctness Testing
3577 //===----------------------------------------------------------------------===//
3578 
3579 #ifndef NDEBUG
3580 namespace {
3581 
3582 /// Apply a less-than relation on the node order, which corresponds to the
3583 /// instruction order prior to scheduling. IsReverse implements greater-than.
3584 template<bool IsReverse>
3585 struct SUnitOrder {
3586  bool operator()(SUnit *A, SUnit *B) const {
3587  if (IsReverse)
3588  return A->NodeNum > B->NodeNum;
3589  else
3590  return A->NodeNum < B->NodeNum;
3591  }
3592 };
3593 
3594 /// Reorder instructions as much as possible.
3595 class InstructionShuffler : public MachineSchedStrategy {
3596  bool IsAlternating;
3597  bool IsTopDown;
3598 
3599  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3600  // gives nodes with a higher number higher priority causing the latest
3601  // instructions to be scheduled first.
3602  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3603  TopQ;
3604 
3605  // When scheduling bottom-up, use greater-than as the queue priority.
3607  BottomQ;
3608 
3609 public:
3610  InstructionShuffler(bool alternate, bool topdown)
3611  : IsAlternating(alternate), IsTopDown(topdown) {}
3612 
3613  void initialize(ScheduleDAGMI*) override {
3614  TopQ.clear();
3615  BottomQ.clear();
3616  }
3617 
3618  /// Implement MachineSchedStrategy interface.
3619  /// -----------------------------------------
3620 
3621  SUnit *pickNode(bool &IsTopNode) override {
3622  SUnit *SU;
3623  if (IsTopDown) {
3624  do {
3625  if (TopQ.empty()) return nullptr;
3626  SU = TopQ.top();
3627  TopQ.pop();
3628  } while (SU->isScheduled);
3629  IsTopNode = true;
3630  } else {
3631  do {
3632  if (BottomQ.empty()) return nullptr;
3633  SU = BottomQ.top();
3634  BottomQ.pop();
3635  } while (SU->isScheduled);
3636  IsTopNode = false;
3637  }
3638  if (IsAlternating)
3639  IsTopDown = !IsTopDown;
3640  return SU;
3641  }
3642 
3643  void schedNode(SUnit *SU, bool IsTopNode) override {}
3644 
3645  void releaseTopNode(SUnit *SU) override {
3646  TopQ.push(SU);
3647  }
3648  void releaseBottomNode(SUnit *SU) override {
3649  BottomQ.push(SU);
3650  }
3651 };
3652 
3653 } // end anonymous namespace
3654 
3656  bool Alternate = !ForceTopDown && !ForceBottomUp;
3657  bool TopDown = !ForceBottomUp;
3658  assert((TopDown || !ForceTopDown) &&
3659  "-misched-topdown incompatible with -misched-bottomup");
3660  return new ScheduleDAGMILive(
3661  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3662 }
3663 
3665  "shuffle", "Shuffle machine instructions alternating directions",
3667 #endif // !NDEBUG
3668 
3669 //===----------------------------------------------------------------------===//
3670 // GraphWriter support for ScheduleDAGMILive.
3671 //===----------------------------------------------------------------------===//
3672 
3673 #ifndef NDEBUG
3674 namespace llvm {
3675 
3676 template<> struct GraphTraits<
3678 
3679 template<>
3682 
3683  static std::string getGraphName(const ScheduleDAG *G) {
3684  return G->MF.getName();
3685  }
3686 
3687  static bool renderGraphFromBottomUp() {
3688  return true;
3689  }
3690 
3691  static bool isNodeHidden(const SUnit *Node) {
3692  if (ViewMISchedCutoff == 0)
3693  return false;
3694  return (Node->Preds.size() > ViewMISchedCutoff
3695  || Node->Succs.size() > ViewMISchedCutoff);
3696  }
3697 
3698  /// If you want to override the dot attributes printed for a particular
3699  /// edge, override this method.
3700  static std::string getEdgeAttributes(const SUnit *Node,
3701  SUnitIterator EI,
3702  const ScheduleDAG *Graph) {
3703  if (EI.isArtificialDep())
3704  return "color=cyan,style=dashed";
3705  if (EI.isCtrlDep())
3706  return "color=blue,style=dashed";
3707  return "";
3708  }
3709 
3710  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3711  std::string Str;
3712  raw_string_ostream SS(Str);
3713  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3714  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3715  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3716  SS << "SU:" << SU->NodeNum;
3717  if (DFS)
3718  SS << " I:" << DFS->getNumInstrs(SU);
3719  return SS.str();
3720  }
3721 
3722  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3723  return G->getGraphNodeLabel(SU);
3724  }
3725 
3726  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3727  std::string Str("shape=Mrecord");
3728  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3729  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3730  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3731  if (DFS) {
3732  Str += ",style=filled,fillcolor=\"#";
3733  Str += DOT::getColorString(DFS->getSubtreeID(N));
3734  Str += '"';
3735  }
3736  return Str;
3737  }
3738 };
3739 
3740 } // end namespace llvm
3741 #endif // NDEBUG
3742 
3743 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3744 /// rendered using 'dot'.
3745 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3746 #ifndef NDEBUG
3747  ViewGraph(this, Name, false, Title);
3748 #else
3749  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3750  << "systems with Graphviz or gv!\n";
3751 #endif // NDEBUG
3752 }
3753 
3754 /// Out-of-line implementation with no arguments is handy for gdb.
3756  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3757 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:207
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:460
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:701
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:211
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:328
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:528
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:821
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:379
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:393
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:482
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1122
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles)
Compute the next cycle at which the given processor resource unit can be scheduled.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::pair< unsigned, unsigned > getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:416
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:63
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:210
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:808
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:372
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:505
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:344