LLVM  10.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
245 
246 /// A dummy default scheduler factory indicates whether the scheduler
247 /// is overridden on the command line.
249  return nullptr;
250 }
251 
252 /// MachineSchedOpt allows command line selection of the scheduler.
255 MachineSchedOpt("misched",
257  cl::desc("Machine instruction scheduler to use"));
258 
260 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
262 
264  "enable-misched",
265  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
266  cl::Hidden);
267 
269  "enable-post-misched",
270  cl::desc("Enable the post-ra machine instruction scheduling pass."),
271  cl::init(true), cl::Hidden);
272 
273 /// Decrement this iterator until reaching the top or a non-debug instr.
277  assert(I != Beg && "reached the top of the region, cannot decrement");
278  while (--I != Beg) {
279  if (!I->isDebugInstr())
280  break;
281  }
282  return I;
283 }
284 
285 /// Non-const version.
291 }
292 
293 /// If this iterator is a debug value, increment until reaching the End or a
294 /// non-debug instruction.
298  for(; I != End; ++I) {
299  if (!I->isDebugInstr())
300  break;
301  }
302  return I;
303 }
304 
305 /// Non-const version.
311 }
312 
313 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
314 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
315  // Select the scheduler, or set the default.
316  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
317  if (Ctor != useDefaultMachineSched)
318  return Ctor(this);
319 
320  // Get the default scheduler set by the target for this function.
321  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
322  if (Scheduler)
323  return Scheduler;
324 
325  // Default to GenericScheduler.
326  return createGenericSchedLive(this);
327 }
328 
329 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
330 /// the caller. We don't have a command line option to override the postRA
331 /// scheduler. The Target must configure it.
332 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
333  // Get the postRA scheduler set by the target for this function.
334  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
335  if (Scheduler)
336  return Scheduler;
337 
338  // Default to GenericScheduler.
339  return createGenericSchedPostRA(this);
340 }
341 
342 /// Top-level MachineScheduler pass driver.
343 ///
344 /// Visit blocks in function order. Divide each block into scheduling regions
345 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
346 /// consistent with the DAG builder, which traverses the interior of the
347 /// scheduling regions bottom-up.
348 ///
349 /// This design avoids exposing scheduling boundaries to the DAG builder,
350 /// simplifying the DAG builder's support for "special" target instructions.
351 /// At the same time the design allows target schedulers to operate across
352 /// scheduling boundaries, for example to bundle the boundary instructions
353 /// without reordering them. This creates complexity, because the target
354 /// scheduler must update the RegionBegin and RegionEnd positions cached by
355 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
356 /// design would be to split blocks at scheduling boundaries, but LLVM has a
357 /// general bias against block splitting purely for implementation simplicity.
358 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
359  if (skipFunction(mf.getFunction()))
360  return false;
361 
362  if (EnableMachineSched.getNumOccurrences()) {
363  if (!EnableMachineSched)
364  return false;
365  } else if (!mf.getSubtarget().enableMachineScheduler())
366  return false;
367 
368  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
369 
370  // Initialize the context of the pass.
371  MF = &mf;
372  MLI = &getAnalysis<MachineLoopInfo>();
373  MDT = &getAnalysis<MachineDominatorTree>();
374  PassConfig = &getAnalysis<TargetPassConfig>();
375  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
376 
377  LIS = &getAnalysis<LiveIntervals>();
378 
379  if (VerifyScheduling) {
380  LLVM_DEBUG(LIS->dump());
381  MF->verify(this, "Before machine scheduling.");
382  }
383  RegClassInfo->runOnMachineFunction(*MF);
384 
385  // Instantiate the selected scheduler for this target, function, and
386  // optimization level.
387  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
388  scheduleRegions(*Scheduler, false);
389 
390  LLVM_DEBUG(LIS->dump());
391  if (VerifyScheduling)
392  MF->verify(this, "After machine scheduling.");
393  return true;
394 }
395 
396 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
397  if (skipFunction(mf.getFunction()))
398  return false;
399 
400  if (EnablePostRAMachineSched.getNumOccurrences()) {
401  if (!EnablePostRAMachineSched)
402  return false;
403  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
404  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
405  return false;
406  }
407  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
408 
409  // Initialize the context of the pass.
410  MF = &mf;
411  MLI = &getAnalysis<MachineLoopInfo>();
412  PassConfig = &getAnalysis<TargetPassConfig>();
413 
414  if (VerifyScheduling)
415  MF->verify(this, "Before post machine scheduling.");
416 
417  // Instantiate the selected scheduler for this target, function, and
418  // optimization level.
419  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
420  scheduleRegions(*Scheduler, true);
421 
422  if (VerifyScheduling)
423  MF->verify(this, "After post machine scheduling.");
424  return true;
425 }
426 
427 /// Return true of the given instruction should not be included in a scheduling
428 /// region.
429 ///
430 /// MachineScheduler does not currently support scheduling across calls. To
431 /// handle calls, the DAG builder needs to be modified to create register
432 /// anti/output dependencies on the registers clobbered by the call's regmask
433 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
434 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
435 /// the boundary, but there would be no benefit to postRA scheduling across
436 /// calls this late anyway.
438  MachineBasicBlock *MBB,
439  MachineFunction *MF,
440  const TargetInstrInfo *TII) {
441  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
442 }
443 
444 /// A region of an MBB for scheduling.
445 namespace {
446 struct SchedRegion {
447  /// RegionBegin is the first instruction in the scheduling region, and
448  /// RegionEnd is either MBB->end() or the scheduling boundary after the
449  /// last instruction in the scheduling region. These iterators cannot refer
450  /// to instructions outside of the identified scheduling region because
451  /// those may be reordered before scheduling this region.
452  MachineBasicBlock::iterator RegionBegin;
453  MachineBasicBlock::iterator RegionEnd;
454  unsigned NumRegionInstrs;
455 
457  unsigned N) :
458  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
459 };
460 } // end anonymous namespace
461 
463 
464 static void
466  MBBRegionsVector &Regions,
467  bool RegionsTopDown) {
468  MachineFunction *MF = MBB->getParent();
469  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
470 
471  MachineBasicBlock::iterator I = nullptr;
472  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
473  RegionEnd != MBB->begin(); RegionEnd = I) {
474 
475  // Avoid decrementing RegionEnd for blocks with no terminator.
476  if (RegionEnd != MBB->end() ||
477  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
478  --RegionEnd;
479  }
480 
481  // The next region starts above the previous region. Look backward in the
482  // instruction stream until we find the nearest boundary.
483  unsigned NumRegionInstrs = 0;
484  I = RegionEnd;
485  for (;I != MBB->begin(); --I) {
486  MachineInstr &MI = *std::prev(I);
487  if (isSchedBoundary(&MI, &*MBB, MF, TII))
488  break;
489  if (!MI.isDebugInstr()) {
490  // MBB::size() uses instr_iterator to count. Here we need a bundle to
491  // count as a single instruction.
492  ++NumRegionInstrs;
493  }
494  }
495 
496  // It's possible we found a scheduling region that only has debug
497  // instructions. Don't bother scheduling these.
498  if (NumRegionInstrs != 0)
499  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
500  }
501 
502  if (RegionsTopDown)
503  std::reverse(Regions.begin(), Regions.end());
504 }
505 
506 /// Main driver for both MachineScheduler and PostMachineScheduler.
507 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
508  bool FixKillFlags) {
509  // Visit all machine basic blocks.
510  //
511  // TODO: Visit blocks in global postorder or postorder within the bottom-up
512  // loop tree. Then we can optionally compute global RegPressure.
513  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
514  MBB != MBBEnd; ++MBB) {
515 
516  Scheduler.startBlock(&*MBB);
517 
518 #ifndef NDEBUG
519  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
520  continue;
521  if (SchedOnlyBlock.getNumOccurrences()
522  && (int)SchedOnlyBlock != MBB->getNumber())
523  continue;
524 #endif
525 
526  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
527  // points to the scheduling boundary at the bottom of the region. The DAG
528  // does not include RegionEnd, but the region does (i.e. the next
529  // RegionEnd is above the previous RegionBegin). If the current block has
530  // no terminator then RegionEnd == MBB->end() for the bottom region.
531  //
532  // All the regions of MBB are first found and stored in MBBRegions, which
533  // will be processed (MBB) top-down if initialized with true.
534  //
535  // The Scheduler may insert instructions during either schedule() or
536  // exitRegion(), even for empty regions. So the local iterators 'I' and
537  // 'RegionEnd' are invalid across these calls. Instructions must not be
538  // added to other regions than the current one without updating MBBRegions.
539 
540  MBBRegionsVector MBBRegions;
541  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
542  for (MBBRegionsVector::iterator R = MBBRegions.begin();
543  R != MBBRegions.end(); ++R) {
544  MachineBasicBlock::iterator I = R->RegionBegin;
545  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
546  unsigned NumRegionInstrs = R->NumRegionInstrs;
547 
548  // Notify the scheduler of the region, even if we may skip scheduling
549  // it. Perhaps it still needs to be bundled.
550  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
551 
552  // Skip empty scheduling regions (0 or 1 schedulable instructions).
553  if (I == RegionEnd || I == std::prev(RegionEnd)) {
554  // Close the current region. Bundle the terminator if needed.
555  // This invalidates 'RegionEnd' and 'I'.
556  Scheduler.exitRegion();
557  continue;
558  }
559  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
560  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
561  << " " << MBB->getName() << "\n From: " << *I
562  << " To: ";
563  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
564  else dbgs() << "End";
565  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
567  errs() << MF->getName();
568  errs() << ":%bb. " << MBB->getNumber();
569  errs() << " " << MBB->getName() << " \n";
570  }
571 
572  // Schedule a region: possibly reorder instructions.
573  // This invalidates the original region iterators.
574  Scheduler.schedule();
575 
576  // Close the current region.
577  Scheduler.exitRegion();
578  }
579  Scheduler.finishBlock();
580  // FIXME: Ideally, no further passes should rely on kill flags. However,
581  // thumb2 size reduction is currently an exception, so the PostMIScheduler
582  // needs to do this.
583  if (FixKillFlags)
584  Scheduler.fixupKills(*MBB);
585  }
586  Scheduler.finalizeSchedule();
587 }
588 
589 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
590  // unimplemented
591 }
592 
593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
595  dbgs() << "Queue " << Name << ": ";
596  for (const SUnit *SU : Queue)
597  dbgs() << SU->NodeNum << " ";
598  dbgs() << "\n";
599 }
600 #endif
601 
602 //===----------------------------------------------------------------------===//
603 // ScheduleDAGMI - Basic machine instruction scheduling. This is
604 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
605 // virtual registers.
606 // ===----------------------------------------------------------------------===/
607 
608 // Provide a vtable anchor.
610 
611 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
612 /// NumPredsLeft reaches zero, release the successor node.
613 ///
614 /// FIXME: Adjust SuccSU height based on MinLatency.
615 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
616  SUnit *SuccSU = SuccEdge->getSUnit();
617 
618  if (SuccEdge->isWeak()) {
619  --SuccSU->WeakPredsLeft;
620  if (SuccEdge->isCluster())
621  NextClusterSucc = SuccSU;
622  return;
623  }
624 #ifndef NDEBUG
625  if (SuccSU->NumPredsLeft == 0) {
626  dbgs() << "*** Scheduling failed! ***\n";
627  dumpNode(*SuccSU);
628  dbgs() << " has been released too many times!\n";
629  llvm_unreachable(nullptr);
630  }
631 #endif
632  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
633  // CurrCycle may have advanced since then.
634  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
635  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
636 
637  --SuccSU->NumPredsLeft;
638  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
639  SchedImpl->releaseTopNode(SuccSU);
640 }
641 
642 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
644  for (SDep &Succ : SU->Succs)
645  releaseSucc(SU, &Succ);
646 }
647 
648 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
649 /// NumSuccsLeft reaches zero, release the predecessor node.
650 ///
651 /// FIXME: Adjust PredSU height based on MinLatency.
652 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
653  SUnit *PredSU = PredEdge->getSUnit();
654 
655  if (PredEdge->isWeak()) {
656  --PredSU->WeakSuccsLeft;
657  if (PredEdge->isCluster())
658  NextClusterPred = PredSU;
659  return;
660  }
661 #ifndef NDEBUG
662  if (PredSU->NumSuccsLeft == 0) {
663  dbgs() << "*** Scheduling failed! ***\n";
664  dumpNode(*PredSU);
665  dbgs() << " has been released too many times!\n";
666  llvm_unreachable(nullptr);
667  }
668 #endif
669  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
670  // CurrCycle may have advanced since then.
671  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
672  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
673 
674  --PredSU->NumSuccsLeft;
675  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
676  SchedImpl->releaseBottomNode(PredSU);
677 }
678 
679 /// releasePredecessors - Call releasePred on each of SU's predecessors.
681  for (SDep &Pred : SU->Preds)
682  releasePred(SU, &Pred);
683 }
684 
687  SchedImpl->enterMBB(bb);
688 }
689 
691  SchedImpl->leaveMBB();
693 }
694 
695 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
696 /// crossing a scheduling boundary. [begin, end) includes all instructions in
697 /// the region, including the boundary itself and single-instruction regions
698 /// that don't get scheduled.
702  unsigned regioninstrs)
703 {
704  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
705 
706  SchedImpl->initPolicy(begin, end, regioninstrs);
707 }
708 
709 /// This is normally called from the main scheduler loop but may also be invoked
710 /// by the scheduling strategy to perform additional code motion.
713  // Advance RegionBegin if the first instruction moves down.
714  if (&*RegionBegin == MI)
715  ++RegionBegin;
716 
717  // Update the instruction stream.
718  BB->splice(InsertPos, BB, MI);
719 
720  // Update LiveIntervals
721  if (LIS)
722  LIS->handleMove(*MI, /*UpdateFlags=*/true);
723 
724  // Recede RegionBegin if an instruction moves above the first.
725  if (RegionBegin == InsertPos)
726  RegionBegin = MI;
727 }
728 
730 #ifndef NDEBUG
731  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
732  CurrentTop = CurrentBottom;
733  return false;
734  }
735  ++NumInstrsScheduled;
736 #endif
737  return true;
738 }
739 
740 /// Per-region scheduling driver, called back from
741 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
742 /// does not consider liveness or register pressure. It is useful for PostRA
743 /// scheduling and potentially other custom schedulers.
745  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
746  LLVM_DEBUG(SchedImpl->dumpPolicy());
747 
748  // Build the DAG.
749  buildSchedGraph(AA);
750 
751  postprocessDAG();
752 
753  SmallVector<SUnit*, 8> TopRoots, BotRoots;
754  findRootsAndBiasEdges(TopRoots, BotRoots);
755 
756  LLVM_DEBUG(dump());
757  if (PrintDAGs) dump();
758  if (ViewMISchedDAGs) viewGraph();
759 
760  // Initialize the strategy before modifying the DAG.
761  // This may initialize a DFSResult to be used for queue priority.
762  SchedImpl->initialize(this);
763 
764  // Initialize ready queues now that the DAG and priority data are finalized.
765  initQueues(TopRoots, BotRoots);
766 
767  bool IsTopNode = false;
768  while (true) {
769  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
770  SUnit *SU = SchedImpl->pickNode(IsTopNode);
771  if (!SU) break;
772 
773  assert(!SU->isScheduled && "Node already scheduled");
774  if (!checkSchedLimit())
775  break;
776 
777  MachineInstr *MI = SU->getInstr();
778  if (IsTopNode) {
779  assert(SU->isTopReady() && "node still has unscheduled dependencies");
780  if (&*CurrentTop == MI)
781  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
782  else
783  moveInstruction(MI, CurrentTop);
784  } else {
785  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
787  priorNonDebug(CurrentBottom, CurrentTop);
788  if (&*priorII == MI)
789  CurrentBottom = priorII;
790  else {
791  if (&*CurrentTop == MI)
792  CurrentTop = nextIfDebug(++CurrentTop, priorII);
793  moveInstruction(MI, CurrentBottom);
794  CurrentBottom = MI;
795  }
796  }
797  // Notify the scheduling strategy before updating the DAG.
798  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
799  // runs, it can then use the accurate ReadyCycle time to determine whether
800  // newly released nodes can move to the readyQ.
801  SchedImpl->schedNode(SU, IsTopNode);
802 
803  updateQueues(SU, IsTopNode);
804  }
805  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
806 
807  placeDebugValues();
808 
809  LLVM_DEBUG({
810  dbgs() << "*** Final schedule for "
811  << printMBBReference(*begin()->getParent()) << " ***\n";
812  dumpSchedule();
813  dbgs() << '\n';
814  });
815 }
816 
817 /// Apply each ScheduleDAGMutation step in order.
819  for (auto &m : Mutations)
820  m->apply(this);
821 }
822 
823 void ScheduleDAGMI::
825  SmallVectorImpl<SUnit*> &BotRoots) {
826  for (SUnit &SU : SUnits) {
827  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
828 
829  // Order predecessors so DFSResult follows the critical path.
830  SU.biasCriticalPath();
831 
832  // A SUnit is ready to top schedule if it has no predecessors.
833  if (!SU.NumPredsLeft)
834  TopRoots.push_back(&SU);
835  // A SUnit is ready to bottom schedule if it has no successors.
836  if (!SU.NumSuccsLeft)
837  BotRoots.push_back(&SU);
838  }
839  ExitSU.biasCriticalPath();
840 }
841 
842 /// Identify DAG roots and setup scheduler queues.
844  ArrayRef<SUnit*> BotRoots) {
845  NextClusterSucc = nullptr;
846  NextClusterPred = nullptr;
847 
848  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
849  //
850  // Nodes with unreleased weak edges can still be roots.
851  // Release top roots in forward order.
852  for (SUnit *SU : TopRoots)
853  SchedImpl->releaseTopNode(SU);
854 
855  // Release bottom roots in reverse order so the higher priority nodes appear
856  // first. This is more natural and slightly more efficient.
858  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
859  SchedImpl->releaseBottomNode(*I);
860  }
861 
862  releaseSuccessors(&EntrySU);
863  releasePredecessors(&ExitSU);
864 
865  SchedImpl->registerRoots();
866 
867  // Advance past initial DebugValues.
868  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
869  CurrentBottom = RegionEnd;
870 }
871 
872 /// Update scheduler queues after scheduling an instruction.
873 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
874  // Release dependent instructions for scheduling.
875  if (IsTopNode)
876  releaseSuccessors(SU);
877  else
878  releasePredecessors(SU);
879 
880  SU->isScheduled = true;
881 }
882 
883 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
885  // If first instruction was a DBG_VALUE then put it back.
886  if (FirstDbgValue) {
887  BB->splice(RegionBegin, BB, FirstDbgValue);
888  RegionBegin = FirstDbgValue;
889  }
890 
891  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
892  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
893  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
894  MachineInstr *DbgValue = P.first;
895  MachineBasicBlock::iterator OrigPrevMI = P.second;
896  if (&*RegionBegin == DbgValue)
897  ++RegionBegin;
898  BB->splice(++OrigPrevMI, BB, DbgValue);
899  if (OrigPrevMI == std::prev(RegionEnd))
900  RegionEnd = DbgValue;
901  }
902  DbgValues.clear();
903  FirstDbgValue = nullptr;
904 }
905 
906 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
908  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
909  if (SUnit *SU = getSUnit(&(*MI)))
910  dumpNode(*SU);
911  else
912  dbgs() << "Missing SUnit\n";
913  }
914 }
915 #endif
916 
917 //===----------------------------------------------------------------------===//
918 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
919 // preservation.
920 //===----------------------------------------------------------------------===//
921 
923  delete DFSResult;
924 }
925 
927  const MachineInstr &MI = *SU.getInstr();
928  for (const MachineOperand &MO : MI.operands()) {
929  if (!MO.isReg())
930  continue;
931  if (!MO.readsReg())
932  continue;
933  if (TrackLaneMasks && !MO.isUse())
934  continue;
935 
936  Register Reg = MO.getReg();
937  if (!Register::isVirtualRegister(Reg))
938  continue;
939 
940  // Ignore re-defs.
941  if (TrackLaneMasks) {
942  bool FoundDef = false;
943  for (const MachineOperand &MO2 : MI.operands()) {
944  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
945  FoundDef = true;
946  break;
947  }
948  }
949  if (FoundDef)
950  continue;
951  }
952 
953  // Record this local VReg use.
954  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
955  for (; UI != VRegUses.end(); ++UI) {
956  if (UI->SU == &SU)
957  break;
958  }
959  if (UI == VRegUses.end())
960  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
961  }
962 }
963 
964 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
965 /// crossing a scheduling boundary. [begin, end) includes all instructions in
966 /// the region, including the boundary itself and single-instruction regions
967 /// that don't get scheduled.
971  unsigned regioninstrs)
972 {
973  // ScheduleDAGMI initializes SchedImpl's per-region policy.
974  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
975 
976  // For convenience remember the end of the liveness region.
977  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
978 
979  SUPressureDiffs.clear();
980 
981  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
982  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
983 
984  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
985  "ShouldTrackLaneMasks requires ShouldTrackPressure");
986 }
987 
988 // Setup the register pressure trackers for the top scheduled and bottom
989 // scheduled regions.
991  VRegUses.clear();
992  VRegUses.setUniverse(MRI.getNumVirtRegs());
993  for (SUnit &SU : SUnits)
994  collectVRegUses(SU);
995 
996  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
997  ShouldTrackLaneMasks, false);
998  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
999  ShouldTrackLaneMasks, false);
1000 
1001  // Close the RPTracker to finalize live ins.
1002  RPTracker.closeRegion();
1003 
1004  LLVM_DEBUG(RPTracker.dump());
1005 
1006  // Initialize the live ins and live outs.
1007  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1008  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1009 
1010  // Close one end of the tracker so we can call
1011  // getMaxUpward/DownwardPressureDelta before advancing across any
1012  // instructions. This converts currently live regs into live ins/outs.
1013  TopRPTracker.closeTop();
1014  BotRPTracker.closeBottom();
1015 
1016  BotRPTracker.initLiveThru(RPTracker);
1017  if (!BotRPTracker.getLiveThru().empty()) {
1018  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1019  LLVM_DEBUG(dbgs() << "Live Thru: ";
1020  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1021  };
1022 
1023  // For each live out vreg reduce the pressure change associated with other
1024  // uses of the same vreg below the live-out reaching def.
1025  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1026 
1027  // Account for liveness generated by the region boundary.
1028  if (LiveRegionEnd != RegionEnd) {
1030  BotRPTracker.recede(&LiveUses);
1031  updatePressureDiffs(LiveUses);
1032  }
1033 
1034  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1035  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1036  dbgs() << "Bottom Pressure:\n";
1037  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1038 
1039  assert((BotRPTracker.getPos() == RegionEnd ||
1040  (RegionEnd->isDebugInstr() &&
1041  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1042  "Can't find the region bottom");
1043 
1044  // Cache the list of excess pressure sets in this region. This will also track
1045  // the max pressure in the scheduled code for these sets.
1046  RegionCriticalPSets.clear();
1047  const std::vector<unsigned> &RegionPressure =
1048  RPTracker.getPressure().MaxSetPressure;
1049  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1050  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1051  if (RegionPressure[i] > Limit) {
1052  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1053  << " Actual " << RegionPressure[i] << "\n");
1054  RegionCriticalPSets.push_back(PressureChange(i));
1055  }
1056  }
1057  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1058  for (const PressureChange &RCPS
1059  : RegionCriticalPSets) dbgs()
1060  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1061  dbgs() << "\n");
1062 }
1063 
1066  const std::vector<unsigned> &NewMaxPressure) {
1067  const PressureDiff &PDiff = getPressureDiff(SU);
1068  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1069  for (const PressureChange &PC : PDiff) {
1070  if (!PC.isValid())
1071  break;
1072  unsigned ID = PC.getPSet();
1073  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1074  ++CritIdx;
1075  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1076  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1077  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1078  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1079  }
1080  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1081  if (NewMaxPressure[ID] >= Limit - 2) {
1082  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1083  << NewMaxPressure[ID]
1084  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1085  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1086  << " livethru)\n");
1087  }
1088  }
1089 }
1090 
1091 /// Update the PressureDiff array for liveness after scheduling this
1092 /// instruction.
1094  ArrayRef<RegisterMaskPair> LiveUses) {
1095  for (const RegisterMaskPair &P : LiveUses) {
1096  unsigned Reg = P.RegUnit;
1097  /// FIXME: Currently assuming single-use physregs.
1098  if (!Register::isVirtualRegister(Reg))
1099  continue;
1100 
1101  if (ShouldTrackLaneMasks) {
1102  // If the register has just become live then other uses won't change
1103  // this fact anymore => decrement pressure.
1104  // If the register has just become dead then other uses make it come
1105  // back to life => increment pressure.
1106  bool Decrement = P.LaneMask.any();
1107 
1108  for (const VReg2SUnit &V2SU
1109  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1110  SUnit &SU = *V2SU.SU;
1111  if (SU.isScheduled || &SU == &ExitSU)
1112  continue;
1113 
1114  PressureDiff &PDiff = getPressureDiff(&SU);
1115  PDiff.addPressureChange(Reg, Decrement, &MRI);
1116  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1117  << printReg(Reg, TRI) << ':'
1118  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1119  dbgs() << " to "; PDiff.dump(*TRI););
1120  }
1121  } else {
1122  assert(P.LaneMask.any());
1123  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1124  // This may be called before CurrentBottom has been initialized. However,
1125  // BotRPTracker must have a valid position. We want the value live into the
1126  // instruction or live out of the block, so ask for the previous
1127  // instruction's live-out.
1128  const LiveInterval &LI = LIS->getInterval(Reg);
1129  VNInfo *VNI;
1131  nextIfDebug(BotRPTracker.getPos(), BB->end());
1132  if (I == BB->end())
1133  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1134  else {
1135  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1136  VNI = LRQ.valueIn();
1137  }
1138  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1139  assert(VNI && "No live value at use.");
1140  for (const VReg2SUnit &V2SU
1141  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1142  SUnit *SU = V2SU.SU;
1143  // If this use comes before the reaching def, it cannot be a last use,
1144  // so decrease its pressure change.
1145  if (!SU->isScheduled && SU != &ExitSU) {
1146  LiveQueryResult LRQ =
1147  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1148  if (LRQ.valueIn() == VNI) {
1149  PressureDiff &PDiff = getPressureDiff(SU);
1150  PDiff.addPressureChange(Reg, true, &MRI);
1151  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1152  << *SU->getInstr();
1153  dbgs() << " to "; PDiff.dump(*TRI););
1154  }
1155  }
1156  }
1157  }
1158  }
1159 }
1160 
1162 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1163  if (EntrySU.getInstr() != nullptr)
1164  dumpNodeAll(EntrySU);
1165  for (const SUnit &SU : SUnits) {
1166  dumpNodeAll(SU);
1167  if (ShouldTrackPressure) {
1168  dbgs() << " Pressure Diff : ";
1169  getPressureDiff(&SU).dump(*TRI);
1170  }
1171  dbgs() << " Single Issue : ";
1172  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1173  SchedModel.mustEndGroup(SU.getInstr()))
1174  dbgs() << "true;";
1175  else
1176  dbgs() << "false;";
1177  dbgs() << '\n';
1178  }
1179  if (ExitSU.getInstr() != nullptr)
1180  dumpNodeAll(ExitSU);
1181 #endif
1182 }
1183 
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1195  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196  LLVM_DEBUG(SchedImpl->dumpPolicy());
1197  buildDAGWithRegPressure();
1198 
1199  postprocessDAG();
1200 
1201  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202  findRootsAndBiasEdges(TopRoots, BotRoots);
1203 
1204  // Initialize the strategy before modifying the DAG.
1205  // This may initialize a DFSResult to be used for queue priority.
1206  SchedImpl->initialize(this);
1207 
1208  LLVM_DEBUG(dump());
1209  if (PrintDAGs) dump();
1210  if (ViewMISchedDAGs) viewGraph();
1211 
1212  // Initialize ready queues now that the DAG and priority data are finalized.
1213  initQueues(TopRoots, BotRoots);
1214 
1215  bool IsTopNode = false;
1216  while (true) {
1217  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1218  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1219  if (!SU) break;
1220 
1221  assert(!SU->isScheduled && "Node already scheduled");
1222  if (!checkSchedLimit())
1223  break;
1224 
1225  scheduleMI(SU, IsTopNode);
1226 
1227  if (DFSResult) {
1228  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1229  if (!ScheduledTrees.test(SubtreeID)) {
1230  ScheduledTrees.set(SubtreeID);
1231  DFSResult->scheduleTree(SubtreeID);
1232  SchedImpl->scheduleTree(SubtreeID);
1233  }
1234  }
1235 
1236  // Notify the scheduling strategy after updating the DAG.
1237  SchedImpl->schedNode(SU, IsTopNode);
1238 
1239  updateQueues(SU, IsTopNode);
1240  }
1241  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1242 
1243  placeDebugValues();
1244 
1245  LLVM_DEBUG({
1246  dbgs() << "*** Final schedule for "
1247  << printMBBReference(*begin()->getParent()) << " ***\n";
1248  dumpSchedule();
1249  dbgs() << '\n';
1250  });
1251 }
1252 
1253 /// Build the DAG and setup three register pressure trackers.
1255  if (!ShouldTrackPressure) {
1256  RPTracker.reset();
1257  RegionCriticalPSets.clear();
1258  buildSchedGraph(AA);
1259  return;
1260  }
1261 
1262  // Initialize the register pressure tracker used by buildSchedGraph.
1263  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1264  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1265 
1266  // Account for liveness generate by the region boundary.
1267  if (LiveRegionEnd != RegionEnd)
1268  RPTracker.recede();
1269 
1270  // Build the DAG, and compute current register pressure.
1271  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1272 
1273  // Initialize top/bottom trackers after computing region pressure.
1274  initRegPressure();
1275 }
1276 
1278  if (!DFSResult)
1279  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1280  DFSResult->clear();
1281  ScheduledTrees.clear();
1282  DFSResult->resize(SUnits.size());
1283  DFSResult->compute(SUnits);
1284  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1285 }
1286 
1287 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1288 /// only provides the critical path for single block loops. To handle loops that
1289 /// span blocks, we could use the vreg path latencies provided by
1290 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1291 /// available for use in the scheduler.
1292 ///
1293 /// The cyclic path estimation identifies a def-use pair that crosses the back
1294 /// edge and considers the depth and height of the nodes. For example, consider
1295 /// the following instruction sequence where each instruction has unit latency
1296 /// and defines an epomymous virtual register:
1297 ///
1298 /// a->b(a,c)->c(b)->d(c)->exit
1299 ///
1300 /// The cyclic critical path is a two cycles: b->c->b
1301 /// The acyclic critical path is four cycles: a->b->c->d->exit
1302 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1303 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1304 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1305 /// LiveInDepth = depth(b) = len(a->b) = 1
1306 ///
1307 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1308 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1309 /// CyclicCriticalPath = min(2, 2) = 2
1310 ///
1311 /// This could be relevant to PostRA scheduling, but is currently implemented
1312 /// assuming LiveIntervals.
1314  // This only applies to single block loop.
1315  if (!BB->isSuccessor(BB))
1316  return 0;
1317 
1318  unsigned MaxCyclicLatency = 0;
1319  // Visit each live out vreg def to find def/use pairs that cross iterations.
1320  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1321  unsigned Reg = P.RegUnit;
1322  if (!Register::isVirtualRegister(Reg))
1323  continue;
1324  const LiveInterval &LI = LIS->getInterval(Reg);
1325  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1326  if (!DefVNI)
1327  continue;
1328 
1329  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1330  const SUnit *DefSU = getSUnit(DefMI);
1331  if (!DefSU)
1332  continue;
1333 
1334  unsigned LiveOutHeight = DefSU->getHeight();
1335  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1336  // Visit all local users of the vreg def.
1337  for (const VReg2SUnit &V2SU
1338  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1339  SUnit *SU = V2SU.SU;
1340  if (SU == &ExitSU)
1341  continue;
1342 
1343  // Only consider uses of the phi.
1344  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1345  if (!LRQ.valueIn()->isPHIDef())
1346  continue;
1347 
1348  // Assume that a path spanning two iterations is a cycle, which could
1349  // overestimate in strange cases. This allows cyclic latency to be
1350  // estimated as the minimum slack of the vreg's depth or height.
1351  unsigned CyclicLatency = 0;
1352  if (LiveOutDepth > SU->getDepth())
1353  CyclicLatency = LiveOutDepth - SU->getDepth();
1354 
1355  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1356  if (LiveInHeight > LiveOutHeight) {
1357  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1358  CyclicLatency = LiveInHeight - LiveOutHeight;
1359  } else
1360  CyclicLatency = 0;
1361 
1362  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1363  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1364  if (CyclicLatency > MaxCyclicLatency)
1365  MaxCyclicLatency = CyclicLatency;
1366  }
1367  }
1368  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1369  return MaxCyclicLatency;
1370 }
1371 
1372 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1373 /// the Top RP tracker in case the region beginning has changed.
1375  ArrayRef<SUnit*> BotRoots) {
1376  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1377  if (ShouldTrackPressure) {
1378  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1379  TopRPTracker.setPos(CurrentTop);
1380  }
1381 }
1382 
1383 /// Move an instruction and update register pressure.
1384 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1385  // Move the instruction to its new location in the instruction stream.
1386  MachineInstr *MI = SU->getInstr();
1387 
1388  if (IsTopNode) {
1389  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1390  if (&*CurrentTop == MI)
1391  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1392  else {
1393  moveInstruction(MI, CurrentTop);
1394  TopRPTracker.setPos(MI);
1395  }
1396 
1397  if (ShouldTrackPressure) {
1398  // Update top scheduled pressure.
1399  RegisterOperands RegOpers;
1400  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1401  if (ShouldTrackLaneMasks) {
1402  // Adjust liveness and add missing dead+read-undef flags.
1403  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1404  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1405  } else {
1406  // Adjust for missing dead-def flags.
1407  RegOpers.detectDeadDefs(*MI, *LIS);
1408  }
1409 
1410  TopRPTracker.advance(RegOpers);
1411  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1412  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1413  TopRPTracker.getRegSetPressureAtPos(), TRI););
1414 
1415  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1416  }
1417  } else {
1418  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1419  MachineBasicBlock::iterator priorII =
1420  priorNonDebug(CurrentBottom, CurrentTop);
1421  if (&*priorII == MI)
1422  CurrentBottom = priorII;
1423  else {
1424  if (&*CurrentTop == MI) {
1425  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1426  TopRPTracker.setPos(CurrentTop);
1427  }
1428  moveInstruction(MI, CurrentBottom);
1429  CurrentBottom = MI;
1430  BotRPTracker.setPos(CurrentBottom);
1431  }
1432  if (ShouldTrackPressure) {
1433  RegisterOperands RegOpers;
1434  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1435  if (ShouldTrackLaneMasks) {
1436  // Adjust liveness and add missing dead+read-undef flags.
1437  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1438  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1439  } else {
1440  // Adjust for missing dead-def flags.
1441  RegOpers.detectDeadDefs(*MI, *LIS);
1442  }
1443 
1444  if (BotRPTracker.getPos() != CurrentBottom)
1445  BotRPTracker.recedeSkipDebugValues();
1447  BotRPTracker.recede(RegOpers, &LiveUses);
1448  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1449  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1450  BotRPTracker.getRegSetPressureAtPos(), TRI););
1451 
1452  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1453  updatePressureDiffs(LiveUses);
1454  }
1455  }
1456 }
1457 
1458 //===----------------------------------------------------------------------===//
1459 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1460 //===----------------------------------------------------------------------===//
1461 
1462 namespace {
1463 
1464 /// Post-process the DAG to create cluster edges between neighboring
1465 /// loads or between neighboring stores.
1466 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1467  struct MemOpInfo {
1468  SUnit *SU;
1469  const MachineOperand *BaseOp;
1470  int64_t Offset;
1471 
1472  MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1473  : SU(su), BaseOp(Op), Offset(ofs) {}
1474 
1475  bool operator<(const MemOpInfo &RHS) const {
1476  if (BaseOp->getType() != RHS.BaseOp->getType())
1477  return BaseOp->getType() < RHS.BaseOp->getType();
1478 
1479  if (BaseOp->isReg())
1480  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1481  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1482  RHS.SU->NodeNum);
1483  if (BaseOp->isFI()) {
1484  const MachineFunction &MF =
1485  *BaseOp->getParent()->getParent()->getParent();
1486  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1487  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1489  // Can't use tuple comparison here since we might need to use a
1490  // different order when the stack grows down.
1491  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1492  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1493  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1494 
1495  if (Offset != RHS.Offset)
1496  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1497 
1498  return SU->NodeNum < RHS.SU->NodeNum;
1499  }
1500 
1501  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1502  "index bases.");
1503  }
1504  };
1505 
1506  const TargetInstrInfo *TII;
1507  const TargetRegisterInfo *TRI;
1508  bool IsLoad;
1509 
1510 public:
1511  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1512  const TargetRegisterInfo *tri, bool IsLoad)
1513  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1514 
1515  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1516 
1517 protected:
1518  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1519 };
1520 
1521 class StoreClusterMutation : public BaseMemOpClusterMutation {
1522 public:
1523  StoreClusterMutation(const TargetInstrInfo *tii,
1524  const TargetRegisterInfo *tri)
1525  : BaseMemOpClusterMutation(tii, tri, false) {}
1526 };
1527 
1528 class LoadClusterMutation : public BaseMemOpClusterMutation {
1529 public:
1530  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1531  : BaseMemOpClusterMutation(tii, tri, true) {}
1532 };
1533 
1534 } // end anonymous namespace
1535 
1536 namespace llvm {
1537 
1538 std::unique_ptr<ScheduleDAGMutation>
1540  const TargetRegisterInfo *TRI) {
1541  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1542  : nullptr;
1543 }
1544 
1545 std::unique_ptr<ScheduleDAGMutation>
1547  const TargetRegisterInfo *TRI) {
1548  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1549  : nullptr;
1550 }
1551 
1552 } // end namespace llvm
1553 
1554 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1555  ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1556  SmallVector<MemOpInfo, 32> MemOpRecords;
1557  for (SUnit *SU : MemOps) {
1558  const MachineOperand *BaseOp;
1559  int64_t Offset;
1560  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1561  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1562  }
1563  if (MemOpRecords.size() < 2)
1564  return;
1565 
1566  llvm::sort(MemOpRecords);
1567  unsigned ClusterLength = 1;
1568  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1569  SUnit *SUa = MemOpRecords[Idx].SU;
1570  SUnit *SUb = MemOpRecords[Idx+1].SU;
1571  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1572  *MemOpRecords[Idx + 1].BaseOp,
1573  ClusterLength) &&
1574  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1575  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1576  << SUb->NodeNum << ")\n");
1577  // Copy successor edges from SUa to SUb. Interleaving computation
1578  // dependent on SUa can prevent load combining due to register reuse.
1579  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1580  // loads should have effectively the same inputs.
1581  for (const SDep &Succ : SUa->Succs) {
1582  if (Succ.getSUnit() == SUb)
1583  continue;
1584  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1585  << ")\n");
1586  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1587  }
1588  ++ClusterLength;
1589  } else
1590  ClusterLength = 1;
1591  }
1592 }
1593 
1594 /// Callback from DAG postProcessing to create cluster edges for loads.
1596  // Map DAG NodeNum to store chain ID.
1597  DenseMap<unsigned, unsigned> StoreChainIDs;
1598  // Map each store chain to a set of dependent MemOps.
1599  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1600  for (SUnit &SU : DAG->SUnits) {
1601  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1602  (!IsLoad && !SU.getInstr()->mayStore()))
1603  continue;
1604 
1605  unsigned ChainPredID = DAG->SUnits.size();
1606  for (const SDep &Pred : SU.Preds) {
1607  if (Pred.isCtrl()) {
1608  ChainPredID = Pred.getSUnit()->NodeNum;
1609  break;
1610  }
1611  }
1612  // Check if this chain-like pred has been seen
1613  // before. ChainPredID==MaxNodeID at the top of the schedule.
1614  unsigned NumChains = StoreChainDependents.size();
1615  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1616  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1617  if (Result.second)
1618  StoreChainDependents.resize(NumChains + 1);
1619  StoreChainDependents[Result.first->second].push_back(&SU);
1620  }
1621 
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChainDependents)
1624  clusterNeighboringMemOps(SCD, DAG);
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 // CopyConstrain - DAG post-processing to encourage copy elimination.
1629 //===----------------------------------------------------------------------===//
1630 
1631 namespace {
1632 
1633 /// Post-process the DAG to create weak edges from all uses of a copy to
1634 /// the one use that defines the copy's source vreg, most likely an induction
1635 /// variable increment.
1636 class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639 
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643 
1644 public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646 
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648 
1649 protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651 };
1652 
1653 } // end anonymous namespace
1654 
1655 namespace llvm {
1656 
1657 std::unique_ptr<ScheduleDAGMutation>
1659  const TargetRegisterInfo *TRI) {
1660  return std::make_unique<CopyConstrain>(TII, TRI);
1661 }
1662 
1663 } // end namespace llvm
1664 
1665 /// constrainLocalCopy handles two possibilities:
1666 /// 1) Local src:
1667 /// I0: = dst
1668 /// I1: src = ...
1669 /// I2: = dst
1670 /// I3: dst = src (copy)
1671 /// (create pred->succ edges I0->I1, I2->I1)
1672 ///
1673 /// 2) Local copy:
1674 /// I0: dst = src (copy)
1675 /// I1: = dst
1676 /// I2: src = ...
1677 /// I3: = dst
1678 /// (create pred->succ edges I1->I2, I3->I2)
1679 ///
1680 /// Although the MachineScheduler is currently constrained to single blocks,
1681 /// this algorithm should handle extended blocks. An EBB is a set of
1682 /// contiguously numbered blocks such that the previous block in the EBB is
1683 /// always the single predecessor.
1684 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687 
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  Register SrcReg = SrcOp.getReg();
1691  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692  return;
1693 
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  Register DstReg = DstOp.getReg();
1696  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1697  return;
1698 
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709  LocalReg = DstReg;
1710  GlobalReg = SrcReg;
1711  LocalLI = &LIS->getInterval(LocalReg);
1712  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713  return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716 
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724  return;
1725 
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731  ++GlobalSegment;
1732 
1733  if (GlobalSegment == GlobalLI->end())
1734  return;
1735 
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738  // Two address defs have no hole.
1739  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740  GlobalSegment->start)) {
1741  return;
1742  }
1743  // If the prior global segment may be defined by the same two-address
1744  // instruction that also defines LocalLI, then can't make a hole here.
1745  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746  LocalLI->beginIndex())) {
1747  return;
1748  }
1749  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750  // it would be a disconnected component in the live range.
1751  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752  "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756  return;
1757 
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760  return;
1761 
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770  continue;
1771  if (Succ.getSUnit() == GlobalSU)
1772  continue;
1773  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774  return;
1775  LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785  continue;
1786  if (Pred.getSUnit() == FirstLocalSU)
1787  continue;
1788  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789  return;
1790  GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1795  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1797  << GlobalSU->NodeNum << ")\n");
1798  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1801  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1803  << FirstLocalSU->NodeNum << ")\n");
1804  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806 }
1807 
1808 /// Callback from DAG postProcessing to create weak edges to encourage
1809 /// copy elimination.
1810 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813 
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816  return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819  *priorNonDebug(DAG->end(), DAG->begin()));
1820 
1821  for (SUnit &SU : DAG->SUnits) {
1822  if (!SU.getInstr()->isCopy())
1823  continue;
1824 
1825  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831 // and possibly other custom schedulers.
1832 //===----------------------------------------------------------------------===//
1833 
1834 static const unsigned InvalidCycle = ~0U;
1835 
1836 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837 
1838 /// Given a Count of resource usage and a Latency value, return true if a
1839 /// SchedBoundary becomes resource limited.
1840 /// If we are checking after scheduling a node, we should return true when
1841 /// we just reach the resource limit.
1842 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1843  unsigned Latency, bool AfterSchedNode) {
1844  int ResCntFactor = (int)(Count - (Latency * LFactor));
1845  if (AfterSchedNode)
1846  return ResCntFactor >= (int)LFactor;
1847  else
1848  return ResCntFactor > (int)LFactor;
1849 }
1850 
1852  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1853  // Destroying and reconstructing it is very expensive though. So keep
1854  // invalid, placeholder HazardRecs.
1855  if (HazardRec && HazardRec->isEnabled()) {
1856  delete HazardRec;
1857  HazardRec = nullptr;
1858  }
1859  Available.clear();
1860  Pending.clear();
1861  CheckPending = false;
1862  CurrCycle = 0;
1863  CurrMOps = 0;
1864  MinReadyCycle = std::numeric_limits<unsigned>::max();
1865  ExpectedLatency = 0;
1866  DependentLatency = 0;
1867  RetiredMOps = 0;
1868  MaxExecutedResCount = 0;
1869  ZoneCritResIdx = 0;
1870  IsResourceLimited = false;
1871  ReservedCycles.clear();
1872  ReservedCyclesIndex.clear();
1873 #ifndef NDEBUG
1874  // Track the maximum number of stall cycles that could arise either from the
1875  // latency of a DAG edge or the number of cycles that a processor resource is
1876  // reserved (SchedBoundary::ReservedCycles).
1877  MaxObservedStall = 0;
1878 #endif
1879  // Reserve a zero-count for invalid CritResIdx.
1880  ExecutedResCounts.resize(1);
1881  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1882 }
1883 
1884 void SchedRemainder::
1885 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1886  reset();
1887  if (!SchedModel->hasInstrSchedModel())
1888  return;
1889  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1890  for (SUnit &SU : DAG->SUnits) {
1891  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1892  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1893  * SchedModel->getMicroOpFactor();
1895  PI = SchedModel->getWriteProcResBegin(SC),
1896  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1897  unsigned PIdx = PI->ProcResourceIdx;
1898  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1899  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1900  }
1901  }
1902 }
1903 
1904 void SchedBoundary::
1906  reset();
1907  DAG = dag;
1908  SchedModel = smodel;
1909  Rem = rem;
1910  if (SchedModel->hasInstrSchedModel()) {
1911  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1912  ReservedCyclesIndex.resize(ResourceCount);
1913  ExecutedResCounts.resize(ResourceCount);
1914  unsigned NumUnits = 0;
1915 
1916  for (unsigned i = 0; i < ResourceCount; ++i) {
1917  ReservedCyclesIndex[i] = NumUnits;
1918  NumUnits += SchedModel->getProcResource(i)->NumUnits;
1919  }
1920 
1921  ReservedCycles.resize(NumUnits, InvalidCycle);
1922  }
1923 }
1924 
1925 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1926 /// these "soft stalls" differently than the hard stall cycles based on CPU
1927 /// resources and computed by checkHazard(). A fully in-order model
1928 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1929 /// available for scheduling until they are ready. However, a weaker in-order
1930 /// model may use this for heuristics. For example, if a processor has in-order
1931 /// behavior when reading certain resources, this may come into play.
1933  if (!SU->isUnbuffered)
1934  return 0;
1935 
1936  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1937  if (ReadyCycle > CurrCycle)
1938  return ReadyCycle - CurrCycle;
1939  return 0;
1940 }
1941 
1942 /// Compute the next cycle at which the given processor resource unit
1943 /// can be scheduled.
1945  unsigned Cycles) {
1946  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1947  // If this resource has never been used, always return cycle zero.
1948  if (NextUnreserved == InvalidCycle)
1949  return 0;
1950  // For bottom-up scheduling add the cycles needed for the current operation.
1951  if (!isTop())
1952  NextUnreserved += Cycles;
1953  return NextUnreserved;
1954 }
1955 
1956 /// Compute the next cycle at which the given processor resource can be
1957 /// scheduled. Returns the next cycle and the index of the processor resource
1958 /// instance in the reserved cycles vector.
1959 std::pair<unsigned, unsigned>
1960 SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1961  unsigned MinNextUnreserved = InvalidCycle;
1962  unsigned InstanceIdx = 0;
1963  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1964  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1965  assert(NumberOfInstances > 0 &&
1966  "Cannot have zero instances of a ProcResource");
1967 
1968  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1969  ++I) {
1970  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1971  if (MinNextUnreserved > NextUnreserved) {
1972  InstanceIdx = I;
1973  MinNextUnreserved = NextUnreserved;
1974  }
1975  }
1976  return std::make_pair(MinNextUnreserved, InstanceIdx);
1977 }
1978 
1979 /// Does this SU have a hazard within the current instruction group.
1980 ///
1981 /// The scheduler supports two modes of hazard recognition. The first is the
1982 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1983 /// supports highly complicated in-order reservation tables
1984 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1985 ///
1986 /// The second is a streamlined mechanism that checks for hazards based on
1987 /// simple counters that the scheduler itself maintains. It explicitly checks
1988 /// for instruction dispatch limitations, including the number of micro-ops that
1989 /// can dispatch per cycle.
1990 ///
1991 /// TODO: Also check whether the SU must start a new group.
1993  if (HazardRec->isEnabled()
1994  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1995  return true;
1996  }
1997 
1998  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1999  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2000  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2001  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2002  return true;
2003  }
2004 
2005  if (CurrMOps > 0 &&
2006  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2007  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2008  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2009  << (isTop() ? "begin" : "end") << " group\n");
2010  return true;
2011  }
2012 
2013  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2014  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2015  for (const MCWriteProcResEntry &PE :
2016  make_range(SchedModel->getWriteProcResBegin(SC),
2017  SchedModel->getWriteProcResEnd(SC))) {
2018  unsigned ResIdx = PE.ProcResourceIdx;
2019  unsigned Cycles = PE.Cycles;
2020  unsigned NRCycle, InstanceIdx;
2021  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2022  if (NRCycle > CurrCycle) {
2023 #ifndef NDEBUG
2024  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2025 #endif
2026  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2027  << SchedModel->getResourceName(ResIdx)
2028  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2029  << "=" << NRCycle << "c\n");
2030  return true;
2031  }
2032  }
2033  }
2034  return false;
2035 }
2036 
2037 // Find the unscheduled node in ReadySUs with the highest latency.
2038 unsigned SchedBoundary::
2040  SUnit *LateSU = nullptr;
2041  unsigned RemLatency = 0;
2042  for (SUnit *SU : ReadySUs) {
2043  unsigned L = getUnscheduledLatency(SU);
2044  if (L > RemLatency) {
2045  RemLatency = L;
2046  LateSU = SU;
2047  }
2048  }
2049  if (LateSU) {
2050  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2051  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2052  }
2053  return RemLatency;
2054 }
2055 
2056 // Count resources in this zone and the remaining unscheduled
2057 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2058 // resource index, or zero if the zone is issue limited.
2059 unsigned SchedBoundary::
2060 getOtherResourceCount(unsigned &OtherCritIdx) {
2061  OtherCritIdx = 0;
2062  if (!SchedModel->hasInstrSchedModel())
2063  return 0;
2064 
2065  unsigned OtherCritCount = Rem->RemIssueCount
2066  + (RetiredMOps * SchedModel->getMicroOpFactor());
2067  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2068  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2069  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2070  PIdx != PEnd; ++PIdx) {
2071  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2072  if (OtherCount > OtherCritCount) {
2073  OtherCritCount = OtherCount;
2074  OtherCritIdx = PIdx;
2075  }
2076  }
2077  if (OtherCritIdx) {
2078  LLVM_DEBUG(
2079  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2080  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2081  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2082  }
2083  return OtherCritCount;
2084 }
2085 
2086 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2087  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2088 
2089 #ifndef NDEBUG
2090  // ReadyCycle was been bumped up to the CurrCycle when this node was
2091  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2092  // scheduling, so may now be greater than ReadyCycle.
2093  if (ReadyCycle > CurrCycle)
2094  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2095 #endif
2096 
2097  if (ReadyCycle < MinReadyCycle)
2098  MinReadyCycle = ReadyCycle;
2099 
2100  // Check for interlocks first. For the purpose of other heuristics, an
2101  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2102  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2103  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2104  Available.size() >= ReadyListLimit)
2105  Pending.push(SU);
2106  else
2107  Available.push(SU);
2108 }
2109 
2110 /// Move the boundary of scheduled code by one cycle.
2111 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2112  if (SchedModel->getMicroOpBufferSize() == 0) {
2113  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2114  "MinReadyCycle uninitialized");
2115  if (MinReadyCycle > NextCycle)
2116  NextCycle = MinReadyCycle;
2117  }
2118  // Update the current micro-ops, which will issue in the next cycle.
2119  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2120  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2121 
2122  // Decrement DependentLatency based on the next cycle.
2123  if ((NextCycle - CurrCycle) > DependentLatency)
2124  DependentLatency = 0;
2125  else
2126  DependentLatency -= (NextCycle - CurrCycle);
2127 
2128  if (!HazardRec->isEnabled()) {
2129  // Bypass HazardRec virtual calls.
2130  CurrCycle = NextCycle;
2131  } else {
2132  // Bypass getHazardType calls in case of long latency.
2133  for (; CurrCycle != NextCycle; ++CurrCycle) {
2134  if (isTop())
2135  HazardRec->AdvanceCycle();
2136  else
2137  HazardRec->RecedeCycle();
2138  }
2139  }
2140  CheckPending = true;
2141  IsResourceLimited =
2142  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2143  getScheduledLatency(), true);
2144 
2145  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2146  << '\n');
2147 }
2148 
2149 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2150  ExecutedResCounts[PIdx] += Count;
2151  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2152  MaxExecutedResCount = ExecutedResCounts[PIdx];
2153 }
2154 
2155 /// Add the given processor resource to this scheduled zone.
2156 ///
2157 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2158 /// during which this resource is consumed.
2159 ///
2160 /// \return the next cycle at which the instruction may execute without
2161 /// oversubscribing resources.
2162 unsigned SchedBoundary::
2163 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2164  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2165  unsigned Count = Factor * Cycles;
2166  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2167  << Cycles << "x" << Factor << "u\n");
2168 
2169  // Update Executed resources counts.
2170  incExecutedResources(PIdx, Count);
2171  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2172  Rem->RemainingCounts[PIdx] -= Count;
2173 
2174  // Check if this resource exceeds the current critical resource. If so, it
2175  // becomes the critical resource.
2176  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2177  ZoneCritResIdx = PIdx;
2178  LLVM_DEBUG(dbgs() << " *** Critical resource "
2179  << SchedModel->getResourceName(PIdx) << ": "
2180  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2181  << "c\n");
2182  }
2183  // For reserved resources, record the highest cycle using the resource.
2184  unsigned NextAvailable, InstanceIdx;
2185  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2186  if (NextAvailable > CurrCycle) {
2187  LLVM_DEBUG(dbgs() << " Resource conflict: "
2188  << SchedModel->getResourceName(PIdx)
2189  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2190  << " reserved until @" << NextAvailable << "\n");
2191  }
2192  return NextAvailable;
2193 }
2194 
2195 /// Move the boundary of scheduled code by one SUnit.
2197  // Update the reservation table.
2198  if (HazardRec->isEnabled()) {
2199  if (!isTop() && SU->isCall) {
2200  // Calls are scheduled with their preceding instructions. For bottom-up
2201  // scheduling, clear the pipeline state before emitting.
2202  HazardRec->Reset();
2203  }
2204  HazardRec->EmitInstruction(SU);
2205  // Scheduling an instruction may have made pending instructions available.
2206  CheckPending = true;
2207  }
2208  // checkHazard should prevent scheduling multiple instructions per cycle that
2209  // exceed the issue width.
2210  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2211  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2212  assert(
2213  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2214  "Cannot schedule this instruction's MicroOps in the current cycle.");
2215 
2216  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2217  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2218 
2219  unsigned NextCycle = CurrCycle;
2220  switch (SchedModel->getMicroOpBufferSize()) {
2221  case 0:
2222  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2223  break;
2224  case 1:
2225  if (ReadyCycle > NextCycle) {
2226  NextCycle = ReadyCycle;
2227  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2228  }
2229  break;
2230  default:
2231  // We don't currently model the OOO reorder buffer, so consider all
2232  // scheduled MOps to be "retired". We do loosely model in-order resource
2233  // latency. If this instruction uses an in-order resource, account for any
2234  // likely stall cycles.
2235  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2236  NextCycle = ReadyCycle;
2237  break;
2238  }
2239  RetiredMOps += IncMOps;
2240 
2241  // Update resource counts and critical resource.
2242  if (SchedModel->hasInstrSchedModel()) {
2243  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2244  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2245  Rem->RemIssueCount -= DecRemIssue;
2246  if (ZoneCritResIdx) {
2247  // Scale scheduled micro-ops for comparing with the critical resource.
2248  unsigned ScaledMOps =
2249  RetiredMOps * SchedModel->getMicroOpFactor();
2250 
2251  // If scaled micro-ops are now more than the previous critical resource by
2252  // a full cycle, then micro-ops issue becomes critical.
2253  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2254  >= (int)SchedModel->getLatencyFactor()) {
2255  ZoneCritResIdx = 0;
2256  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2257  << ScaledMOps / SchedModel->getLatencyFactor()
2258  << "c\n");
2259  }
2260  }
2262  PI = SchedModel->getWriteProcResBegin(SC),
2263  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2264  unsigned RCycle =
2265  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2266  if (RCycle > NextCycle)
2267  NextCycle = RCycle;
2268  }
2269  if (SU->hasReservedResource) {
2270  // For reserved resources, record the highest cycle using the resource.
2271  // For top-down scheduling, this is the cycle in which we schedule this
2272  // instruction plus the number of cycles the operations reserves the
2273  // resource. For bottom-up is it simply the instruction's cycle.
2275  PI = SchedModel->getWriteProcResBegin(SC),
2276  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2277  unsigned PIdx = PI->ProcResourceIdx;
2278  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2279  unsigned ReservedUntil, InstanceIdx;
2280  std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2281  if (isTop()) {
2282  ReservedCycles[InstanceIdx] =
2283  std::max(ReservedUntil, NextCycle + PI->Cycles);
2284  } else
2285  ReservedCycles[InstanceIdx] = NextCycle;
2286  }
2287  }
2288  }
2289  }
2290  // Update ExpectedLatency and DependentLatency.
2291  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2292  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2293  if (SU->getDepth() > TopLatency) {
2294  TopLatency = SU->getDepth();
2295  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2296  << SU->NodeNum << ") " << TopLatency << "c\n");
2297  }
2298  if (SU->getHeight() > BotLatency) {
2299  BotLatency = SU->getHeight();
2300  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2301  << SU->NodeNum << ") " << BotLatency << "c\n");
2302  }
2303  // If we stall for any reason, bump the cycle.
2304  if (NextCycle > CurrCycle)
2305  bumpCycle(NextCycle);
2306  else
2307  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2308  // resource limited. If a stall occurred, bumpCycle does this.
2309  IsResourceLimited =
2310  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2311  getScheduledLatency(), true);
2312 
2313  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2314  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2315  // one cycle. Since we commonly reach the max MOps here, opportunistically
2316  // bump the cycle to avoid uselessly checking everything in the readyQ.
2317  CurrMOps += IncMOps;
2318 
2319  // Bump the cycle count for issue group constraints.
2320  // This must be done after NextCycle has been adjust for all other stalls.
2321  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2322  // currCycle to X.
2323  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2324  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2325  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2326  << " group\n");
2327  bumpCycle(++NextCycle);
2328  }
2329 
2330  while (CurrMOps >= SchedModel->getIssueWidth()) {
2331  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2332  << CurrCycle << '\n');
2333  bumpCycle(++NextCycle);
2334  }
2335  LLVM_DEBUG(dumpScheduledState());
2336 }
2337 
2338 /// Release pending ready nodes in to the available queue. This makes them
2339 /// visible to heuristics.
2341  // If the available queue is empty, it is safe to reset MinReadyCycle.
2342  if (Available.empty())
2343  MinReadyCycle = std::numeric_limits<unsigned>::max();
2344 
2345  // Check to see if any of the pending instructions are ready to issue. If
2346  // so, add them to the available queue.
2347  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2348  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2349  SUnit *SU = *(Pending.begin()+i);
2350  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2351 
2352  if (ReadyCycle < MinReadyCycle)
2353  MinReadyCycle = ReadyCycle;
2354 
2355  if (!IsBuffered && ReadyCycle > CurrCycle)
2356  continue;
2357 
2358  if (checkHazard(SU))
2359  continue;
2360 
2361  if (Available.size() >= ReadyListLimit)
2362  break;
2363 
2364  Available.push(SU);
2365  Pending.remove(Pending.begin()+i);
2366  --i; --e;
2367  }
2368  CheckPending = false;
2369 }
2370 
2371 /// Remove SU from the ready set for this boundary.
2373  if (Available.isInQueue(SU))
2374  Available.remove(Available.find(SU));
2375  else {
2376  assert(Pending.isInQueue(SU) && "bad ready count");
2377  Pending.remove(Pending.find(SU));
2378  }
2379 }
2380 
2381 /// If this queue only has one ready candidate, return it. As a side effect,
2382 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2383 /// one node is ready. If multiple instructions are ready, return NULL.
2385  if (CheckPending)
2386  releasePending();
2387 
2388  if (CurrMOps > 0) {
2389  // Defer any ready instrs that now have a hazard.
2390  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2391  if (checkHazard(*I)) {
2392  Pending.push(*I);
2393  I = Available.remove(I);
2394  continue;
2395  }
2396  ++I;
2397  }
2398  }
2399  for (unsigned i = 0; Available.empty(); ++i) {
2400 // FIXME: Re-enable assert once PR20057 is resolved.
2401 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2402 // "permanent hazard");
2403  (void)i;
2404  bumpCycle(CurrCycle + 1);
2405  releasePending();
2406  }
2407 
2408  LLVM_DEBUG(Pending.dump());
2409  LLVM_DEBUG(Available.dump());
2410 
2411  if (Available.size() == 1)
2412  return *Available.begin();
2413  return nullptr;
2414 }
2415 
2416 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2417 // This is useful information to dump after bumpNode.
2418 // Note that the Queue contents are more useful before pickNodeFromQueue.
2420  unsigned ResFactor;
2421  unsigned ResCount;
2422  if (ZoneCritResIdx) {
2423  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2424  ResCount = getResourceCount(ZoneCritResIdx);
2425  } else {
2426  ResFactor = SchedModel->getMicroOpFactor();
2427  ResCount = RetiredMOps * ResFactor;
2428  }
2429  unsigned LFactor = SchedModel->getLatencyFactor();
2430  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2431  << " Retired: " << RetiredMOps;
2432  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2433  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2434  << ResCount / ResFactor << " "
2435  << SchedModel->getResourceName(ZoneCritResIdx)
2436  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2437  << (IsResourceLimited ? " - Resource" : " - Latency")
2438  << " limited.\n";
2439 }
2440 #endif
2441 
2442 //===----------------------------------------------------------------------===//
2443 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2444 //===----------------------------------------------------------------------===//
2445 
2448  const TargetSchedModel *SchedModel) {
2449  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2450  return;
2451 
2452  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2454  PI = SchedModel->getWriteProcResBegin(SC),
2455  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2456  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2457  ResDelta.CritResources += PI->Cycles;
2458  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2459  ResDelta.DemandedResources += PI->Cycles;
2460  }
2461 }
2462 
2463 /// Compute remaining latency. We need this both to determine whether the
2464 /// overall schedule has become latency-limited and whether the instructions
2465 /// outside this zone are resource or latency limited.
2466 ///
2467 /// The "dependent" latency is updated incrementally during scheduling as the
2468 /// max height/depth of scheduled nodes minus the cycles since it was
2469 /// scheduled:
2470 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2471 ///
2472 /// The "independent" latency is the max ready queue depth:
2473 /// ILat = max N.depth for N in Available|Pending
2474 ///
2475 /// RemainingLatency is the greater of independent and dependent latency.
2476 ///
2477 /// These computations are expensive, especially in DAGs with many edges, so
2478 /// only do them if necessary.
2479 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2480  unsigned RemLatency = CurrZone.getDependentLatency();
2481  RemLatency = std::max(RemLatency,
2482  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2483  RemLatency = std::max(RemLatency,
2484  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2485  return RemLatency;
2486 }
2487 
2488 /// Returns true if the current cycle plus remaning latency is greater than
2489 /// the critical path in the scheduling region.
2490 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2491  SchedBoundary &CurrZone,
2492  bool ComputeRemLatency,
2493  unsigned &RemLatency) const {
2494  // The current cycle is already greater than the critical path, so we are
2495  // already latency limited and don't need to compute the remaining latency.
2496  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2497  return true;
2498 
2499  // If we haven't scheduled anything yet, then we aren't latency limited.
2500  if (CurrZone.getCurrCycle() == 0)
2501  return false;
2502 
2503  if (ComputeRemLatency)
2504  RemLatency = computeRemLatency(CurrZone);
2505 
2506  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2507 }
2508 
2509 /// Set the CandPolicy given a scheduling zone given the current resources and
2510 /// latencies inside and outside the zone.
2511 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2512  SchedBoundary &CurrZone,
2513  SchedBoundary *OtherZone) {
2514  // Apply preemptive heuristics based on the total latency and resources
2515  // inside and outside this zone. Potential stalls should be considered before
2516  // following this policy.
2517 
2518  // Compute the critical resource outside the zone.
2519  unsigned OtherCritIdx = 0;
2520  unsigned OtherCount =
2521  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2522 
2523  bool OtherResLimited = false;
2524  unsigned RemLatency = 0;
2525  bool RemLatencyComputed = false;
2526  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2527  RemLatency = computeRemLatency(CurrZone);
2528  RemLatencyComputed = true;
2529  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2530  OtherCount, RemLatency, false);
2531  }
2532 
2533  // Schedule aggressively for latency in PostRA mode. We don't check for
2534  // acyclic latency during PostRA, and highly out-of-order processors will
2535  // skip PostRA scheduling.
2536  if (!OtherResLimited &&
2537  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2538  RemLatency))) {
2539  Policy.ReduceLatency |= true;
2540  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2541  << " RemainingLatency " << RemLatency << " + "
2542  << CurrZone.getCurrCycle() << "c > CritPath "
2543  << Rem.CriticalPath << "\n");
2544  }
2545  // If the same resource is limiting inside and outside the zone, do nothing.
2546  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2547  return;
2548 
2549  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2550  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2551  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2552  } if (OtherResLimited) dbgs()
2553  << " RemainingLimit: "
2554  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2555  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2556  << " Latency limited both directions.\n");
2557 
2558  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2559  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2560 
2561  if (OtherResLimited)
2562  Policy.DemandResIdx = OtherCritIdx;
2563 }
2564 
2565 #ifndef NDEBUG
2568  switch (Reason) {
2569  case NoCand: return "NOCAND ";
2570  case Only1: return "ONLY1 ";
2571  case PhysReg: return "PHYS-REG ";
2572  case RegExcess: return "REG-EXCESS";
2573  case RegCritical: return "REG-CRIT ";
2574  case Stall: return "STALL ";
2575  case Cluster: return "CLUSTER ";
2576  case Weak: return "WEAK ";
2577  case RegMax: return "REG-MAX ";
2578  case ResourceReduce: return "RES-REDUCE";
2579  case ResourceDemand: return "RES-DEMAND";
2580  case TopDepthReduce: return "TOP-DEPTH ";
2581  case TopPathReduce: return "TOP-PATH ";
2582  case BotHeightReduce:return "BOT-HEIGHT";
2583  case BotPathReduce: return "BOT-PATH ";
2584  case NextDefUse: return "DEF-USE ";
2585  case NodeOrder: return "ORDER ";
2586  };
2587  llvm_unreachable("Unknown reason!");
2588 }
2589 
2591  PressureChange P;
2592  unsigned ResIdx = 0;
2593  unsigned Latency = 0;
2594  switch (Cand.Reason) {
2595  default:
2596  break;
2597  case RegExcess:
2598  P = Cand.RPDelta.Excess;
2599  break;
2600  case RegCritical:
2601  P = Cand.RPDelta.CriticalMax;
2602  break;
2603  case RegMax:
2604  P = Cand.RPDelta.CurrentMax;
2605  break;
2606  case ResourceReduce:
2607  ResIdx = Cand.Policy.ReduceResIdx;
2608  break;
2609  case ResourceDemand:
2610  ResIdx = Cand.Policy.DemandResIdx;
2611  break;
2612  case TopDepthReduce:
2613  Latency = Cand.SU->getDepth();
2614  break;
2615  case TopPathReduce:
2616  Latency = Cand.SU->getHeight();
2617  break;
2618  case BotHeightReduce:
2619  Latency = Cand.SU->getHeight();
2620  break;
2621  case BotPathReduce:
2622  Latency = Cand.SU->getDepth();
2623  break;
2624  }
2625  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2626  if (P.isValid())
2627  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2628  << ":" << P.getUnitInc() << " ";
2629  else
2630  dbgs() << " ";
2631  if (ResIdx)
2632  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2633  else
2634  dbgs() << " ";
2635  if (Latency)
2636  dbgs() << " " << Latency << " cycles ";
2637  else
2638  dbgs() << " ";
2639  dbgs() << '\n';
2640 }
2641 #endif
2642 
2643 namespace llvm {
2644 /// Return true if this heuristic determines order.
2645 bool tryLess(int TryVal, int CandVal,
2649  if (TryVal < CandVal) {
2650  TryCand.Reason = Reason;
2651  return true;
2652  }
2653  if (TryVal > CandVal) {
2654  if (Cand.Reason > Reason)
2655  Cand.Reason = Reason;
2656  return true;
2657  }
2658  return false;
2659 }
2660 
2661 bool tryGreater(int TryVal, int CandVal,
2665  if (TryVal > CandVal) {
2666  TryCand.Reason = Reason;
2667  return true;
2668  }
2669  if (TryVal < CandVal) {
2670  if (Cand.Reason > Reason)
2671  Cand.Reason = Reason;
2672  return true;
2673  }
2674  return false;
2675 }
2676 
2679  SchedBoundary &Zone) {
2680  if (Zone.isTop()) {
2681  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2682  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2683  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2684  return true;
2685  }
2686  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2687  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2688  return true;
2689  } else {
2690  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2691  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2692  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2693  return true;
2694  }
2695  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2696  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2697  return true;
2698  }
2699  return false;
2700 }
2701 } // end namespace llvm
2702 
2703 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2704  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2705  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2706 }
2707 
2709  tracePick(Cand.Reason, Cand.AtTop);
2710 }
2711 
2713  assert(dag->hasVRegLiveness() &&
2714  "(PreRA)GenericScheduler needs vreg liveness");
2715  DAG = static_cast<ScheduleDAGMILive*>(dag);
2716  SchedModel = DAG->getSchedModel();
2717  TRI = DAG->TRI;
2718 
2719  Rem.init(DAG, SchedModel);
2720  Top.init(DAG, SchedModel, &Rem);
2721  Bot.init(DAG, SchedModel, &Rem);
2722 
2723  // Initialize resource counts.
2724 
2725  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2726  // are disabled, then these HazardRecs will be disabled.
2727  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2728  if (!Top.HazardRec) {
2729  Top.HazardRec =
2730  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2731  Itin, DAG);
2732  }
2733  if (!Bot.HazardRec) {
2734  Bot.HazardRec =
2735  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2736  Itin, DAG);
2737  }
2738  TopCand.SU = nullptr;
2739  BotCand.SU = nullptr;
2740 }
2741 
2742 /// Initialize the per-region scheduling policy.
2745  unsigned NumRegionInstrs) {
2746  const MachineFunction &MF = *Begin->getMF();
2747  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2748 
2749  // Avoid setting up the register pressure tracker for small regions to save
2750  // compile time. As a rough heuristic, only track pressure when the number of
2751  // schedulable instructions exceeds half the integer register file.
2752  RegionPolicy.ShouldTrackPressure = true;
2753  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2754  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2755  if (TLI->isTypeLegal(LegalIntVT)) {
2756  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2757  TLI->getRegClassFor(LegalIntVT));
2758  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2759  }
2760  }
2761 
2762  // For generic targets, we default to bottom-up, because it's simpler and more
2763  // compile-time optimizations have been implemented in that direction.
2764  RegionPolicy.OnlyBottomUp = true;
2765 
2766  // Allow the subtarget to override default policy.
2767  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2768 
2769  // After subtarget overrides, apply command line options.
2770  if (!EnableRegPressure) {
2771  RegionPolicy.ShouldTrackPressure = false;
2772  RegionPolicy.ShouldTrackLaneMasks = false;
2773  }
2774 
2775  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2776  // e.g. -misched-bottomup=false allows scheduling in both directions.
2778  "-misched-topdown incompatible with -misched-bottomup");
2779  if (ForceBottomUp.getNumOccurrences() > 0) {
2780  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2781  if (RegionPolicy.OnlyBottomUp)
2782  RegionPolicy.OnlyTopDown = false;
2783  }
2784  if (ForceTopDown.getNumOccurrences() > 0) {
2785  RegionPolicy.OnlyTopDown = ForceTopDown;
2786  if (RegionPolicy.OnlyTopDown)
2787  RegionPolicy.OnlyBottomUp = false;
2788  }
2789 }
2790 
2792  // Cannot completely remove virtual function even in release mode.
2793 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2794  dbgs() << "GenericScheduler RegionPolicy: "
2795  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2796  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2797  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2798  << "\n";
2799 #endif
2800 }
2801 
2802 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2803 /// critical path by more cycles than it takes to drain the instruction buffer.
2804 /// We estimate an upper bounds on in-flight instructions as:
2805 ///
2806 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2807 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2808 /// InFlightResources = InFlightIterations * LoopResources
2809 ///
2810 /// TODO: Check execution resources in addition to IssueCount.
2812  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2813  return;
2814 
2815  // Scaled number of cycles per loop iteration.
2816  unsigned IterCount =
2817  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2818  Rem.RemIssueCount);
2819  // Scaled acyclic critical path.
2820  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2821  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2822  unsigned InFlightCount =
2823  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2824  unsigned BufferLimit =
2825  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2826 
2827  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2828 
2829  LLVM_DEBUG(
2830  dbgs() << "IssueCycles="
2831  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2832  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2833  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2834  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2835  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2836  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2837 }
2838 
2840  Rem.CriticalPath = DAG->ExitSU.getDepth();
2841 
2842  // Some roots may not feed into ExitSU. Check all of them in case.
2843  for (const SUnit *SU : Bot.Available) {
2844  if (SU->getDepth() > Rem.CriticalPath)
2845  Rem.CriticalPath = SU->getDepth();
2846  }
2847  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2848  if (DumpCriticalPathLength) {
2849  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2850  }
2851 
2852  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2853  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2854  checkAcyclicLatency();
2855  }
2856 }
2857 
2858 namespace llvm {
2859 bool tryPressure(const PressureChange &TryP,
2860  const PressureChange &CandP,
2864  const TargetRegisterInfo *TRI,
2865  const MachineFunction &MF) {
2866  // If one candidate decreases and the other increases, go with it.
2867  // Invalid candidates have UnitInc==0.
2868  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2869  Reason)) {
2870  return true;
2871  }
2872  // Do not compare the magnitude of pressure changes between top and bottom
2873  // boundary.
2874  if (Cand.AtTop != TryCand.AtTop)
2875  return false;
2876 
2877  // If both candidates affect the same set in the same boundary, go with the
2878  // smallest increase.
2879  unsigned TryPSet = TryP.getPSetOrMax();
2880  unsigned CandPSet = CandP.getPSetOrMax();
2881  if (TryPSet == CandPSet) {
2882  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2883  Reason);
2884  }
2885 
2886  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2888 
2889  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2891 
2892  // If the candidates are decreasing pressure, reverse priority.
2893  if (TryP.getUnitInc() < 0)
2894  std::swap(TryRank, CandRank);
2895  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2896 }
2897 
2898 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2899  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2900 }
2901 
2902 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2903 /// their physreg def/use.
2904 ///
2905 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2906 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2907 /// with the operation that produces or consumes the physreg. We'll do this when
2908 /// regalloc has support for parallel copies.
2909 int biasPhysReg(const SUnit *SU, bool isTop) {
2910  const MachineInstr *MI = SU->getInstr();
2911 
2912  if (MI->isCopy()) {
2913  unsigned ScheduledOper = isTop ? 1 : 0;
2914  unsigned UnscheduledOper = isTop ? 0 : 1;
2915  // If we have already scheduled the physreg produce/consumer, immediately
2916  // schedule the copy.
2917  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
2918  return 1;
2919  // If the physreg is at the boundary, defer it. Otherwise schedule it
2920  // immediately to free the dependent. We can hoist the copy later.
2921  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2922  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
2923  return AtBoundary ? -1 : 1;
2924  }
2925 
2926  if (MI->isMoveImmediate()) {
2927  // If we have a move immediate and all successors have been assigned, bias
2928  // towards scheduling this later. Make sure all register defs are to
2929  // physical registers.
2930  bool DoBias = true;
2931  for (const MachineOperand &Op : MI->defs()) {
2932  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
2933  DoBias = false;
2934  break;
2935  }
2936  }
2937 
2938  if (DoBias)
2939  return isTop ? -1 : 1;
2940  }
2941 
2942  return 0;
2943 }
2944 } // end namespace llvm
2945 
2947  bool AtTop,
2948  const RegPressureTracker &RPTracker,
2949  RegPressureTracker &TempTracker) {
2950  Cand.SU = SU;
2951  Cand.AtTop = AtTop;
2952  if (DAG->isTrackingPressure()) {
2953  if (AtTop) {
2954  TempTracker.getMaxDownwardPressureDelta(
2955  Cand.SU->getInstr(),
2956  Cand.RPDelta,
2957  DAG->getRegionCriticalPSets(),
2959  } else {
2960  if (VerifyScheduling) {
2961  TempTracker.getMaxUpwardPressureDelta(
2962  Cand.SU->getInstr(),
2963  &DAG->getPressureDiff(Cand.SU),
2964  Cand.RPDelta,
2965  DAG->getRegionCriticalPSets(),
2967  } else {
2968  RPTracker.getUpwardPressureDelta(
2969  Cand.SU->getInstr(),
2970  DAG->getPressureDiff(Cand.SU),
2971  Cand.RPDelta,
2972  DAG->getRegionCriticalPSets(),
2974  }
2975  }
2976  }
2977  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2978  << " Try SU(" << Cand.SU->NodeNum << ") "
2979  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2980  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2981 }
2982 
2983 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2984 /// hierarchical. This may be more efficient than a graduated cost model because
2985 /// we don't need to evaluate all aspects of the model for each node in the
2986 /// queue. But it's really done to make the heuristics easier to debug and
2987 /// statistically analyze.
2988 ///
2989 /// \param Cand provides the policy and current best candidate.
2990 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2991 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2992 // if Cand is from a different zone than TryCand.
2994  SchedCandidate &TryCand,
2995  SchedBoundary *Zone) const {
2996  // Initialize the candidate if needed.
2997  if (!Cand.isValid()) {
2998  TryCand.Reason = NodeOrder;
2999  return;
3000  }
3001 
3002  // Bias PhysReg Defs and copies to their uses and defined respectively.
3003  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3004  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3005  return;
3006 
3007  // Avoid exceeding the target's limit.
3008  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3009  Cand.RPDelta.Excess,
3010  TryCand, Cand, RegExcess, TRI,
3011  DAG->MF))
3012  return;
3013 
3014  // Avoid increasing the max critical pressure in the scheduled region.
3015  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3016  Cand.RPDelta.CriticalMax,
3017  TryCand, Cand, RegCritical, TRI,
3018  DAG->MF))
3019  return;
3020 
3021  // We only compare a subset of features when comparing nodes between
3022  // Top and Bottom boundary. Some properties are simply incomparable, in many
3023  // other instances we should only override the other boundary if something
3024  // is a clear good pick on one boundary. Skip heuristics that are more
3025  // "tie-breaking" in nature.
3026  bool SameBoundary = Zone != nullptr;
3027  if (SameBoundary) {
3028  // For loops that are acyclic path limited, aggressively schedule for
3029  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3030  // heuristics to take precedence.
3031  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3032  tryLatency(TryCand, Cand, *Zone))
3033  return;
3034 
3035  // Prioritize instructions that read unbuffered resources by stall cycles.
3036  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3037  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3038  return;
3039  }
3040 
3041  // Keep clustered nodes together to encourage downstream peephole
3042  // optimizations which may reduce resource requirements.
3043  //
3044  // This is a best effort to set things up for a post-RA pass. Optimizations
3045  // like generating loads of multiple registers should ideally be done within
3046  // the scheduler pass by combining the loads during DAG postprocessing.
3047  const SUnit *CandNextClusterSU =
3048  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3049  const SUnit *TryCandNextClusterSU =
3050  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3051  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3052  Cand.SU == CandNextClusterSU,
3053  TryCand, Cand, Cluster))
3054  return;
3055 
3056  if (SameBoundary) {
3057  // Weak edges are for clustering and other constraints.
3058  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3059  getWeakLeft(Cand.SU, Cand.AtTop),
3060  TryCand, Cand, Weak))
3061  return;
3062  }
3063 
3064  // Avoid increasing the max pressure of the entire region.
3065  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3066  Cand.RPDelta.CurrentMax,
3067  TryCand, Cand, RegMax, TRI,
3068  DAG->MF))
3069  return;
3070 
3071  if (SameBoundary) {
3072  // Avoid critical resource consumption and balance the schedule.
3073  TryCand.initResourceDelta(DAG, SchedModel);
3075  TryCand, Cand, ResourceReduce))
3076  return;
3079  TryCand, Cand, ResourceDemand))
3080  return;
3081 
3082  // Avoid serializing long latency dependence chains.
3083  // For acyclic path limited loops, latency was already checked above.
3084  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3085  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3086  return;
3087 
3088  // Fall through to original instruction order.
3089  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3090  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3091  TryCand.Reason = NodeOrder;
3092  }
3093  }
3094 }
3095 
3096 /// Pick the best candidate from the queue.
3097 ///
3098 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3099 /// DAG building. To adjust for the current scheduling location we need to
3100 /// maintain the number of vreg uses remaining to be top-scheduled.
3102  const CandPolicy &ZonePolicy,
3103  const RegPressureTracker &RPTracker,
3104  SchedCandidate &Cand) {
3105  // getMaxPressureDelta temporarily modifies the tracker.
3106  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3107 
3108  ReadyQueue &Q = Zone.Available;
3109  for (SUnit *SU : Q) {
3110 
3111  SchedCandidate TryCand(ZonePolicy);
3112  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3113  // Pass SchedBoundary only when comparing nodes from the same boundary.
3114  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3115  tryCandidate(Cand, TryCand, ZoneArg);
3116  if (TryCand.Reason != NoCand) {
3117  // Initialize resource delta if needed in case future heuristics query it.
3118  if (TryCand.ResDelta == SchedResourceDelta())
3119  TryCand.initResourceDelta(DAG, SchedModel);
3120  Cand.setBest(TryCand);
3121  LLVM_DEBUG(traceCandidate(Cand));
3122  }
3123  }
3124 }
3125 
3126 /// Pick the best candidate node from either the top or bottom queue.
3128  // Schedule as far as possible in the direction of no choice. This is most
3129  // efficient, but also provides the best heuristics for CriticalPSets.
3130  if (SUnit *SU = Bot.pickOnlyChoice()) {
3131  IsTopNode = false;
3132  tracePick(Only1, false);
3133  return SU;
3134  }
3135  if (SUnit *SU = Top.pickOnlyChoice()) {
3136  IsTopNode = true;
3137  tracePick(Only1, true);
3138  return SU;
3139  }
3140  // Set the bottom-up policy based on the state of the current bottom zone and
3141  // the instructions outside the zone, including the top zone.
3142  CandPolicy BotPolicy;
3143  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3144  // Set the top-down policy based on the state of the current top zone and
3145  // the instructions outside the zone, including the bottom zone.
3146  CandPolicy TopPolicy;
3147  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3148 
3149  // See if BotCand is still valid (because we previously scheduled from Top).
3150  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3151  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3152  BotCand.Policy != BotPolicy) {
3153  BotCand.reset(CandPolicy());
3154  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3155  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3156  } else {
3157  LLVM_DEBUG(traceCandidate(BotCand));
3158 #ifndef NDEBUG
3159  if (VerifyScheduling) {
3160  SchedCandidate TCand;
3161  TCand.reset(CandPolicy());
3162  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3163  assert(TCand.SU == BotCand.SU &&
3164  "Last pick result should correspond to re-picking right now");
3165  }
3166 #endif
3167  }
3168 
3169  // Check if the top Q has a better candidate.
3170  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3171  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3172  TopCand.Policy != TopPolicy) {
3173  TopCand.reset(CandPolicy());
3174  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3175  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3176  } else {
3177  LLVM_DEBUG(traceCandidate(TopCand));
3178 #ifndef NDEBUG
3179  if (VerifyScheduling) {
3180  SchedCandidate TCand;
3181  TCand.reset(CandPolicy());
3182  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3183  assert(TCand.SU == TopCand.SU &&
3184  "Last pick result should correspond to re-picking right now");
3185  }
3186 #endif
3187  }
3188 
3189  // Pick best from BotCand and TopCand.
3190  assert(BotCand.isValid());
3191  assert(TopCand.isValid());
3192  SchedCandidate Cand = BotCand;
3193  TopCand.Reason = NoCand;
3194  tryCandidate(Cand, TopCand, nullptr);
3195  if (TopCand.Reason != NoCand) {
3196  Cand.setBest(TopCand);
3197  LLVM_DEBUG(traceCandidate(Cand));
3198  }
3199 
3200  IsTopNode = Cand.AtTop;
3201  tracePick(Cand);
3202  return Cand.SU;
3203 }
3204 
3205 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3207  if (DAG->top() == DAG->bottom()) {
3208  assert(Top.Available.empty() && Top.Pending.empty() &&
3209  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3210  return nullptr;
3211  }
3212  SUnit *SU;
3213  do {
3214  if (RegionPolicy.OnlyTopDown) {
3215  SU = Top.pickOnlyChoice();
3216  if (!SU) {
3217  CandPolicy NoPolicy;
3218  TopCand.reset(NoPolicy);
3219  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3220  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3221  tracePick(TopCand);
3222  SU = TopCand.SU;
3223  }
3224  IsTopNode = true;
3225  } else if (RegionPolicy.OnlyBottomUp) {
3226  SU = Bot.pickOnlyChoice();
3227  if (!SU) {
3228  CandPolicy NoPolicy;
3229  BotCand.reset(NoPolicy);
3230  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3231  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3232  tracePick(BotCand);
3233  SU = BotCand.SU;
3234  }
3235  IsTopNode = false;
3236  } else {
3237  SU = pickNodeBidirectional(IsTopNode);
3238  }
3239  } while (SU->isScheduled);
3240 
3241  if (SU->isTopReady())
3242  Top.removeReady(SU);
3243  if (SU->isBottomReady())
3244  Bot.removeReady(SU);
3245 
3246  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3247  << *SU->getInstr());
3248  return SU;
3249 }
3250 
3252  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3253  if (!isTop)
3254  ++InsertPos;
3255  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3256 
3257  // Find already scheduled copies with a single physreg dependence and move
3258  // them just above the scheduled instruction.
3259  for (SDep &Dep : Deps) {
3260  if (Dep.getKind() != SDep::Data ||
3261  !Register::isPhysicalRegister(Dep.getReg()))
3262  continue;
3263  SUnit *DepSU = Dep.getSUnit();
3264  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3265  continue;
3266  MachineInstr *Copy = DepSU->getInstr();
3267  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3268  continue;
3269  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3270  DAG->dumpNode(*Dep.getSUnit()));
3271  DAG->moveInstruction(Copy, InsertPos);
3272  }
3273 }
3274 
3275 /// Update the scheduler's state after scheduling a node. This is the same node
3276 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3277 /// update it's state based on the current cycle before MachineSchedStrategy
3278 /// does.
3279 ///
3280 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3281 /// them here. See comments in biasPhysReg.
3282 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3283  if (IsTopNode) {
3284  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3285  Top.bumpNode(SU);
3286  if (SU->hasPhysRegUses)
3287  reschedulePhysReg(SU, true);
3288  } else {
3289  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3290  Bot.bumpNode(SU);
3291  if (SU->hasPhysRegDefs)
3292  reschedulePhysReg(SU, false);
3293  }
3294 }
3295 
3296 /// Create the standard converging machine scheduler. This will be used as the
3297 /// default scheduler if the target does not set a default.
3299  ScheduleDAGMILive *DAG =
3300  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3301  // Register DAG post-processors.
3302  //
3303  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3304  // data and pass it to later mutations. Have a single mutation that gathers
3305  // the interesting nodes in one pass.
3307  return DAG;
3308 }
3309 
3311  return createGenericSchedLive(C);
3312 }
3313 
3314 static MachineSchedRegistry
3315 GenericSchedRegistry("converge", "Standard converging scheduler.",
3317 
3318 //===----------------------------------------------------------------------===//
3319 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3320 //===----------------------------------------------------------------------===//
3321 
3323  DAG = Dag;
3324  SchedModel = DAG->getSchedModel();
3325  TRI = DAG->TRI;
3326 
3327  Rem.init(DAG, SchedModel);
3328  Top.init(DAG, SchedModel, &Rem);
3329  BotRoots.clear();
3330 
3331  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3332  // or are disabled, then these HazardRecs will be disabled.
3333  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3334  if (!Top.HazardRec) {
3335  Top.HazardRec =
3337  Itin, DAG);
3338  }
3339 }
3340 
3342  Rem.CriticalPath = DAG->ExitSU.getDepth();
3343 
3344  // Some roots may not feed into ExitSU. Check all of them in case.
3345  for (const SUnit *SU : BotRoots) {
3346  if (SU->getDepth() > Rem.CriticalPath)
3347  Rem.CriticalPath = SU->getDepth();
3348  }
3349  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3350  if (DumpCriticalPathLength) {
3351  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3352  }
3353 }
3354 
3355 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3356 ///
3357 /// \param Cand provides the policy and current best candidate.
3358 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3360  SchedCandidate &TryCand) {
3361  // Initialize the candidate if needed.
3362  if (!Cand.isValid()) {
3363  TryCand.Reason = NodeOrder;
3364  return;
3365  }
3366 
3367  // Prioritize instructions that read unbuffered resources by stall cycles.
3368  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3369  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3370  return;
3371 
3372  // Keep clustered nodes together.
3373  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3374  Cand.SU == DAG->getNextClusterSucc(),
3375  TryCand, Cand, Cluster))
3376  return;
3377 
3378  // Avoid critical resource consumption and balance the schedule.
3379  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3380  TryCand, Cand, ResourceReduce))
3381  return;
3383  Cand.ResDelta.DemandedResources,
3384  TryCand, Cand, ResourceDemand))
3385  return;
3386 
3387  // Avoid serializing long latency dependence chains.
3388  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3389  return;
3390  }
3391 
3392  // Fall through to original instruction order.
3393  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3394  TryCand.Reason = NodeOrder;
3395 }
3396 
3398  ReadyQueue &Q = Top.Available;
3399  for (SUnit *SU : Q) {
3400  SchedCandidate TryCand(Cand.Policy);
3401  TryCand.SU = SU;
3402  TryCand.AtTop = true;
3403  TryCand.initResourceDelta(DAG, SchedModel);
3404  tryCandidate(Cand, TryCand);
3405  if (TryCand.Reason != NoCand) {
3406  Cand.setBest(TryCand);
3407  LLVM_DEBUG(traceCandidate(Cand));
3408  }
3409  }
3410 }
3411 
3412 /// Pick the next node to schedule.
3414  if (DAG->top() == DAG->bottom()) {
3415  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3416  return nullptr;
3417  }
3418  SUnit *SU;
3419  do {
3420  SU = Top.pickOnlyChoice();
3421  if (SU) {
3422  tracePick(Only1, true);
3423  } else {
3424  CandPolicy NoPolicy;
3425  SchedCandidate TopCand(NoPolicy);
3426  // Set the top-down policy based on the state of the current top zone and
3427  // the instructions outside the zone, including the bottom zone.
3428  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3429  pickNodeFromQueue(TopCand);
3430  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3431  tracePick(TopCand);
3432  SU = TopCand.SU;
3433  }
3434  } while (SU->isScheduled);
3435 
3436  IsTopNode = true;
3437  Top.removeReady(SU);
3438 
3439  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3440  << *SU->getInstr());
3441  return SU;
3442 }
3443 
3444 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3445 /// scheduled/remaining flags in the DAG nodes.
3446 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3447  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3448  Top.bumpNode(SU);
3449 }
3450 
3452  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3453  /*RemoveKillFlags=*/true);
3454 }
3455 
3456 //===----------------------------------------------------------------------===//
3457 // ILP Scheduler. Currently for experimental analysis of heuristics.
3458 //===----------------------------------------------------------------------===//
3459 
3460 namespace {
3461 
3462 /// Order nodes by the ILP metric.
3463 struct ILPOrder {
3464  const SchedDFSResult *DFSResult = nullptr;
3465  const BitVector *ScheduledTrees = nullptr;
3466  bool MaximizeILP;
3467 
3468  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3469 
3470  /// Apply a less-than relation on node priority.
3471  ///
3472  /// (Return true if A comes after B in the Q.)
3473  bool operator()(const SUnit *A, const SUnit *B) const {
3474  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3475  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3476  if (SchedTreeA != SchedTreeB) {
3477  // Unscheduled trees have lower priority.
3478  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3479  return ScheduledTrees->test(SchedTreeB);
3480 
3481  // Trees with shallower connections have have lower priority.
3482  if (DFSResult->getSubtreeLevel(SchedTreeA)
3483  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3484  return DFSResult->getSubtreeLevel(SchedTreeA)
3485  < DFSResult->getSubtreeLevel(SchedTreeB);
3486  }
3487  }
3488  if (MaximizeILP)
3489  return DFSResult->getILP(A) < DFSResult->getILP(B);
3490  else
3491  return DFSResult->getILP(A) > DFSResult->getILP(B);
3492  }
3493 };
3494 
3495 /// Schedule based on the ILP metric.
3496 class ILPScheduler : public MachineSchedStrategy {
3497  ScheduleDAGMILive *DAG = nullptr;
3498  ILPOrder Cmp;
3499 
3500  std::vector<SUnit*> ReadyQ;
3501 
3502 public:
3503  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3504 
3505  void initialize(ScheduleDAGMI *dag) override {
3506  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3507  DAG = static_cast<ScheduleDAGMILive*>(dag);
3508  DAG->computeDFSResult();
3509  Cmp.DFSResult = DAG->getDFSResult();
3510  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3511  ReadyQ.clear();
3512  }
3513 
3514  void registerRoots() override {
3515  // Restore the heap in ReadyQ with the updated DFS results.
3516  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3517  }
3518 
3519  /// Implement MachineSchedStrategy interface.
3520  /// -----------------------------------------
3521 
3522  /// Callback to select the highest priority node from the ready Q.
3523  SUnit *pickNode(bool &IsTopNode) override {
3524  if (ReadyQ.empty()) return nullptr;
3525  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3526  SUnit *SU = ReadyQ.back();
3527  ReadyQ.pop_back();
3528  IsTopNode = false;
3529  LLVM_DEBUG(dbgs() << "Pick node "
3530  << "SU(" << SU->NodeNum << ") "
3531  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3532  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3533  << " @"
3534  << DAG->getDFSResult()->getSubtreeLevel(
3535  DAG->getDFSResult()->getSubtreeID(SU))
3536  << '\n'
3537  << "Scheduling " << *SU->getInstr());
3538  return SU;
3539  }
3540 
3541  /// Scheduler callback to notify that a new subtree is scheduled.
3542  void scheduleTree(unsigned SubtreeID) override {
3543  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3544  }
3545 
3546  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3547  /// DFSResults, and resort the priority Q.
3548  void schedNode(SUnit *SU, bool IsTopNode) override {
3549  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3550  }
3551 
3552  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3553 
3554  void releaseBottomNode(SUnit *SU) override {
3555  ReadyQ.push_back(SU);
3556  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3557  }
3558 };
3559 
3560 } // end anonymous namespace
3561 
3563  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3564 }
3566  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3567 }
3568 
3570  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3572  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3573 
3574 //===----------------------------------------------------------------------===//
3575 // Machine Instruction Shuffler for Correctness Testing
3576 //===----------------------------------------------------------------------===//
3577 
3578 #ifndef NDEBUG
3579 namespace {
3580 
3581 /// Apply a less-than relation on the node order, which corresponds to the
3582 /// instruction order prior to scheduling. IsReverse implements greater-than.
3583 template<bool IsReverse>
3584 struct SUnitOrder {
3585  bool operator()(SUnit *A, SUnit *B) const {
3586  if (IsReverse)
3587  return A->NodeNum > B->NodeNum;
3588  else
3589  return A->NodeNum < B->NodeNum;
3590  }
3591 };
3592 
3593 /// Reorder instructions as much as possible.
3594 class InstructionShuffler : public MachineSchedStrategy {
3595  bool IsAlternating;
3596  bool IsTopDown;
3597 
3598  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3599  // gives nodes with a higher number higher priority causing the latest
3600  // instructions to be scheduled first.
3601  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3602  TopQ;
3603 
3604  // When scheduling bottom-up, use greater-than as the queue priority.
3606  BottomQ;
3607 
3608 public:
3609  InstructionShuffler(bool alternate, bool topdown)
3610  : IsAlternating(alternate), IsTopDown(topdown) {}
3611 
3612  void initialize(ScheduleDAGMI*) override {
3613  TopQ.clear();
3614  BottomQ.clear();
3615  }
3616 
3617  /// Implement MachineSchedStrategy interface.
3618  /// -----------------------------------------
3619 
3620  SUnit *pickNode(bool &IsTopNode) override {
3621  SUnit *SU;
3622  if (IsTopDown) {
3623  do {
3624  if (TopQ.empty()) return nullptr;
3625  SU = TopQ.top();
3626  TopQ.pop();
3627  } while (SU->isScheduled);
3628  IsTopNode = true;
3629  } else {
3630  do {
3631  if (BottomQ.empty()) return nullptr;
3632  SU = BottomQ.top();
3633  BottomQ.pop();
3634  } while (SU->isScheduled);
3635  IsTopNode = false;
3636  }
3637  if (IsAlternating)
3638  IsTopDown = !IsTopDown;
3639  return SU;
3640  }
3641 
3642  void schedNode(SUnit *SU, bool IsTopNode) override {}
3643 
3644  void releaseTopNode(SUnit *SU) override {
3645  TopQ.push(SU);
3646  }
3647  void releaseBottomNode(SUnit *SU) override {
3648  BottomQ.push(SU);
3649  }
3650 };
3651 
3652 } // end anonymous namespace
3653 
3655  bool Alternate = !ForceTopDown && !ForceBottomUp;
3656  bool TopDown = !ForceBottomUp;
3657  assert((TopDown || !ForceTopDown) &&
3658  "-misched-topdown incompatible with -misched-bottomup");
3659  return new ScheduleDAGMILive(
3660  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3661 }
3662 
3664  "shuffle", "Shuffle machine instructions alternating directions",
3666 #endif // !NDEBUG
3667 
3668 //===----------------------------------------------------------------------===//
3669 // GraphWriter support for ScheduleDAGMILive.
3670 //===----------------------------------------------------------------------===//
3671 
3672 #ifndef NDEBUG
3673 namespace llvm {
3674 
3675 template<> struct GraphTraits<
3677 
3678 template<>
3681 
3682  static std::string getGraphName(const ScheduleDAG *G) {
3683  return G->MF.getName();
3684  }
3685 
3686  static bool renderGraphFromBottomUp() {
3687  return true;
3688  }
3689 
3690  static bool isNodeHidden(const SUnit *Node) {
3691  if (ViewMISchedCutoff == 0)
3692  return false;
3693  return (Node->Preds.size() > ViewMISchedCutoff
3694  || Node->Succs.size() > ViewMISchedCutoff);
3695  }
3696 
3697  /// If you want to override the dot attributes printed for a particular
3698  /// edge, override this method.
3699  static std::string getEdgeAttributes(const SUnit *Node,
3700  SUnitIterator EI,
3701  const ScheduleDAG *Graph) {
3702  if (EI.isArtificialDep())
3703  return "color=cyan,style=dashed";
3704  if (EI.isCtrlDep())
3705  return "color=blue,style=dashed";
3706  return "";
3707  }
3708 
3709  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3710  std::string Str;
3711  raw_string_ostream SS(Str);
3712  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3713  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3714  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3715  SS << "SU:" << SU->NodeNum;
3716  if (DFS)
3717  SS << " I:" << DFS->getNumInstrs(SU);
3718  return SS.str();
3719  }
3720 
3721  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3722  return G->getGraphNodeLabel(SU);
3723  }
3724 
3725  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3726  std::string Str("shape=Mrecord");
3727  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3728  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3729  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3730  if (DFS) {
3731  Str += ",style=filled,fillcolor=\"#";
3732  Str += DOT::getColorString(DFS->getSubtreeID(N));
3733  Str += '"';
3734  }
3735  return Str;
3736  }
3737 };
3738 
3739 } // end namespace llvm
3740 #endif // NDEBUG
3741 
3742 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3743 /// rendered using 'dot'.
3744 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3745 #ifndef NDEBUG
3746  ViewGraph(this, Name, false, Title);
3747 #else
3748  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3749  << "systems with Graphviz or gv!\n";
3750 #endif // NDEBUG
3751 }
3752 
3753 /// Out-of-line implementation with no arguments is handy for gdb.
3755  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3756 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:476
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:207
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:675
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:718
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:211
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:314
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:528
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:379
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:393
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
LiveInterval & getInterval(Register Reg)
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles)
Compute the next cycle at which the given processor resource unit can be scheduled.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::pair< unsigned, unsigned > getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:416
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:210
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:372
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:505
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register getReg() const
getReg - Returns the register number.
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:344