LLVM  9.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
245 
246 /// A dummy default scheduler factory indicates whether the scheduler
247 /// is overridden on the command line.
249  return nullptr;
250 }
251 
252 /// MachineSchedOpt allows command line selection of the scheduler.
255 MachineSchedOpt("misched",
257  cl::desc("Machine instruction scheduler to use"));
258 
260 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
262 
264  "enable-misched",
265  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
266  cl::Hidden);
267 
269  "enable-post-misched",
270  cl::desc("Enable the post-ra machine instruction scheduling pass."),
271  cl::init(true), cl::Hidden);
272 
273 /// Decrement this iterator until reaching the top or a non-debug instr.
277  assert(I != Beg && "reached the top of the region, cannot decrement");
278  while (--I != Beg) {
279  if (!I->isDebugInstr())
280  break;
281  }
282  return I;
283 }
284 
285 /// Non-const version.
291 }
292 
293 /// If this iterator is a debug value, increment until reaching the End or a
294 /// non-debug instruction.
298  for(; I != End; ++I) {
299  if (!I->isDebugInstr())
300  break;
301  }
302  return I;
303 }
304 
305 /// Non-const version.
311 }
312 
313 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
314 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
315  // Select the scheduler, or set the default.
316  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
317  if (Ctor != useDefaultMachineSched)
318  return Ctor(this);
319 
320  // Get the default scheduler set by the target for this function.
321  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
322  if (Scheduler)
323  return Scheduler;
324 
325  // Default to GenericScheduler.
326  return createGenericSchedLive(this);
327 }
328 
329 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
330 /// the caller. We don't have a command line option to override the postRA
331 /// scheduler. The Target must configure it.
332 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
333  // Get the postRA scheduler set by the target for this function.
334  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
335  if (Scheduler)
336  return Scheduler;
337 
338  // Default to GenericScheduler.
339  return createGenericSchedPostRA(this);
340 }
341 
342 /// Top-level MachineScheduler pass driver.
343 ///
344 /// Visit blocks in function order. Divide each block into scheduling regions
345 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
346 /// consistent with the DAG builder, which traverses the interior of the
347 /// scheduling regions bottom-up.
348 ///
349 /// This design avoids exposing scheduling boundaries to the DAG builder,
350 /// simplifying the DAG builder's support for "special" target instructions.
351 /// At the same time the design allows target schedulers to operate across
352 /// scheduling boundaries, for example to bundle the boundary instructions
353 /// without reordering them. This creates complexity, because the target
354 /// scheduler must update the RegionBegin and RegionEnd positions cached by
355 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
356 /// design would be to split blocks at scheduling boundaries, but LLVM has a
357 /// general bias against block splitting purely for implementation simplicity.
358 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
359  if (skipFunction(mf.getFunction()))
360  return false;
361 
362  if (EnableMachineSched.getNumOccurrences()) {
363  if (!EnableMachineSched)
364  return false;
365  } else if (!mf.getSubtarget().enableMachineScheduler())
366  return false;
367 
368  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
369 
370  // Initialize the context of the pass.
371  MF = &mf;
372  MLI = &getAnalysis<MachineLoopInfo>();
373  MDT = &getAnalysis<MachineDominatorTree>();
374  PassConfig = &getAnalysis<TargetPassConfig>();
375  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
376 
377  LIS = &getAnalysis<LiveIntervals>();
378 
379  if (VerifyScheduling) {
380  LLVM_DEBUG(LIS->dump());
381  MF->verify(this, "Before machine scheduling.");
382  }
383  RegClassInfo->runOnMachineFunction(*MF);
384 
385  // Instantiate the selected scheduler for this target, function, and
386  // optimization level.
387  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
388  scheduleRegions(*Scheduler, false);
389 
390  LLVM_DEBUG(LIS->dump());
391  if (VerifyScheduling)
392  MF->verify(this, "After machine scheduling.");
393  return true;
394 }
395 
396 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
397  if (skipFunction(mf.getFunction()))
398  return false;
399 
400  if (EnablePostRAMachineSched.getNumOccurrences()) {
401  if (!EnablePostRAMachineSched)
402  return false;
403  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
404  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
405  return false;
406  }
407  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
408 
409  // Initialize the context of the pass.
410  MF = &mf;
411  MLI = &getAnalysis<MachineLoopInfo>();
412  PassConfig = &getAnalysis<TargetPassConfig>();
413 
414  if (VerifyScheduling)
415  MF->verify(this, "Before post machine scheduling.");
416 
417  // Instantiate the selected scheduler for this target, function, and
418  // optimization level.
419  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
420  scheduleRegions(*Scheduler, true);
421 
422  if (VerifyScheduling)
423  MF->verify(this, "After post machine scheduling.");
424  return true;
425 }
426 
427 /// Return true of the given instruction should not be included in a scheduling
428 /// region.
429 ///
430 /// MachineScheduler does not currently support scheduling across calls. To
431 /// handle calls, the DAG builder needs to be modified to create register
432 /// anti/output dependencies on the registers clobbered by the call's regmask
433 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
434 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
435 /// the boundary, but there would be no benefit to postRA scheduling across
436 /// calls this late anyway.
438  MachineBasicBlock *MBB,
439  MachineFunction *MF,
440  const TargetInstrInfo *TII) {
441  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
442 }
443 
444 /// A region of an MBB for scheduling.
445 namespace {
446 struct SchedRegion {
447  /// RegionBegin is the first instruction in the scheduling region, and
448  /// RegionEnd is either MBB->end() or the scheduling boundary after the
449  /// last instruction in the scheduling region. These iterators cannot refer
450  /// to instructions outside of the identified scheduling region because
451  /// those may be reordered before scheduling this region.
452  MachineBasicBlock::iterator RegionBegin;
453  MachineBasicBlock::iterator RegionEnd;
454  unsigned NumRegionInstrs;
455 
457  unsigned N) :
458  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
459 };
460 } // end anonymous namespace
461 
463 
464 static void
466  MBBRegionsVector &Regions,
467  bool RegionsTopDown) {
468  MachineFunction *MF = MBB->getParent();
469  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
470 
471  MachineBasicBlock::iterator I = nullptr;
472  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
473  RegionEnd != MBB->begin(); RegionEnd = I) {
474 
475  // Avoid decrementing RegionEnd for blocks with no terminator.
476  if (RegionEnd != MBB->end() ||
477  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
478  --RegionEnd;
479  }
480 
481  // The next region starts above the previous region. Look backward in the
482  // instruction stream until we find the nearest boundary.
483  unsigned NumRegionInstrs = 0;
484  I = RegionEnd;
485  for (;I != MBB->begin(); --I) {
486  MachineInstr &MI = *std::prev(I);
487  if (isSchedBoundary(&MI, &*MBB, MF, TII))
488  break;
489  if (!MI.isDebugInstr())
490  // MBB::size() uses instr_iterator to count. Here we need a bundle to
491  // count as a single instruction.
492  ++NumRegionInstrs;
493  }
494 
495  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
496  }
497 
498  if (RegionsTopDown)
499  std::reverse(Regions.begin(), Regions.end());
500 }
501 
502 /// Main driver for both MachineScheduler and PostMachineScheduler.
503 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
504  bool FixKillFlags) {
505  // Visit all machine basic blocks.
506  //
507  // TODO: Visit blocks in global postorder or postorder within the bottom-up
508  // loop tree. Then we can optionally compute global RegPressure.
509  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
510  MBB != MBBEnd; ++MBB) {
511 
512  Scheduler.startBlock(&*MBB);
513 
514 #ifndef NDEBUG
515  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
516  continue;
517  if (SchedOnlyBlock.getNumOccurrences()
518  && (int)SchedOnlyBlock != MBB->getNumber())
519  continue;
520 #endif
521 
522  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
523  // points to the scheduling boundary at the bottom of the region. The DAG
524  // does not include RegionEnd, but the region does (i.e. the next
525  // RegionEnd is above the previous RegionBegin). If the current block has
526  // no terminator then RegionEnd == MBB->end() for the bottom region.
527  //
528  // All the regions of MBB are first found and stored in MBBRegions, which
529  // will be processed (MBB) top-down if initialized with true.
530  //
531  // The Scheduler may insert instructions during either schedule() or
532  // exitRegion(), even for empty regions. So the local iterators 'I' and
533  // 'RegionEnd' are invalid across these calls. Instructions must not be
534  // added to other regions than the current one without updating MBBRegions.
535 
536  MBBRegionsVector MBBRegions;
537  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
538  for (MBBRegionsVector::iterator R = MBBRegions.begin();
539  R != MBBRegions.end(); ++R) {
540  MachineBasicBlock::iterator I = R->RegionBegin;
541  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
542  unsigned NumRegionInstrs = R->NumRegionInstrs;
543 
544  // Notify the scheduler of the region, even if we may skip scheduling
545  // it. Perhaps it still needs to be bundled.
546  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
547 
548  // Skip empty scheduling regions (0 or 1 schedulable instructions).
549  if (I == RegionEnd || I == std::prev(RegionEnd)) {
550  // Close the current region. Bundle the terminator if needed.
551  // This invalidates 'RegionEnd' and 'I'.
552  Scheduler.exitRegion();
553  continue;
554  }
555  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
556  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
557  << " " << MBB->getName() << "\n From: " << *I
558  << " To: ";
559  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
560  else dbgs() << "End";
561  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
563  errs() << MF->getName();
564  errs() << ":%bb. " << MBB->getNumber();
565  errs() << " " << MBB->getName() << " \n";
566  }
567 
568  // Schedule a region: possibly reorder instructions.
569  // This invalidates the original region iterators.
570  Scheduler.schedule();
571 
572  // Close the current region.
573  Scheduler.exitRegion();
574  }
575  Scheduler.finishBlock();
576  // FIXME: Ideally, no further passes should rely on kill flags. However,
577  // thumb2 size reduction is currently an exception, so the PostMIScheduler
578  // needs to do this.
579  if (FixKillFlags)
580  Scheduler.fixupKills(*MBB);
581  }
582  Scheduler.finalizeSchedule();
583 }
584 
585 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
586  // unimplemented
587 }
588 
589 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
591  dbgs() << "Queue " << Name << ": ";
592  for (const SUnit *SU : Queue)
593  dbgs() << SU->NodeNum << " ";
594  dbgs() << "\n";
595 }
596 #endif
597 
598 //===----------------------------------------------------------------------===//
599 // ScheduleDAGMI - Basic machine instruction scheduling. This is
600 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
601 // virtual registers.
602 // ===----------------------------------------------------------------------===/
603 
604 // Provide a vtable anchor.
606 
607 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
608  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
609 }
610 
611 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
612  if (SuccSU != &ExitSU) {
613  // Do not use WillCreateCycle, it assumes SD scheduling.
614  // If Pred is reachable from Succ, then the edge creates a cycle.
615  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
616  return false;
617  Topo.AddPred(SuccSU, PredDep.getSUnit());
618  }
619  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
620  // Return true regardless of whether a new edge needed to be inserted.
621  return true;
622 }
623 
624 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
625 /// NumPredsLeft reaches zero, release the successor node.
626 ///
627 /// FIXME: Adjust SuccSU height based on MinLatency.
628 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
629  SUnit *SuccSU = SuccEdge->getSUnit();
630 
631  if (SuccEdge->isWeak()) {
632  --SuccSU->WeakPredsLeft;
633  if (SuccEdge->isCluster())
634  NextClusterSucc = SuccSU;
635  return;
636  }
637 #ifndef NDEBUG
638  if (SuccSU->NumPredsLeft == 0) {
639  dbgs() << "*** Scheduling failed! ***\n";
640  dumpNode(*SuccSU);
641  dbgs() << " has been released too many times!\n";
642  llvm_unreachable(nullptr);
643  }
644 #endif
645  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
646  // CurrCycle may have advanced since then.
647  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
648  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
649 
650  --SuccSU->NumPredsLeft;
651  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
652  SchedImpl->releaseTopNode(SuccSU);
653 }
654 
655 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
657  for (SDep &Succ : SU->Succs)
658  releaseSucc(SU, &Succ);
659 }
660 
661 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
662 /// NumSuccsLeft reaches zero, release the predecessor node.
663 ///
664 /// FIXME: Adjust PredSU height based on MinLatency.
665 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
666  SUnit *PredSU = PredEdge->getSUnit();
667 
668  if (PredEdge->isWeak()) {
669  --PredSU->WeakSuccsLeft;
670  if (PredEdge->isCluster())
671  NextClusterPred = PredSU;
672  return;
673  }
674 #ifndef NDEBUG
675  if (PredSU->NumSuccsLeft == 0) {
676  dbgs() << "*** Scheduling failed! ***\n";
677  dumpNode(*PredSU);
678  dbgs() << " has been released too many times!\n";
679  llvm_unreachable(nullptr);
680  }
681 #endif
682  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
683  // CurrCycle may have advanced since then.
684  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
685  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
686 
687  --PredSU->NumSuccsLeft;
688  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
689  SchedImpl->releaseBottomNode(PredSU);
690 }
691 
692 /// releasePredecessors - Call releasePred on each of SU's predecessors.
694  for (SDep &Pred : SU->Preds)
695  releasePred(SU, &Pred);
696 }
697 
700  SchedImpl->enterMBB(bb);
701 }
702 
704  SchedImpl->leaveMBB();
706 }
707 
708 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
709 /// crossing a scheduling boundary. [begin, end) includes all instructions in
710 /// the region, including the boundary itself and single-instruction regions
711 /// that don't get scheduled.
715  unsigned regioninstrs)
716 {
717  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
718 
719  SchedImpl->initPolicy(begin, end, regioninstrs);
720 }
721 
722 /// This is normally called from the main scheduler loop but may also be invoked
723 /// by the scheduling strategy to perform additional code motion.
726  // Advance RegionBegin if the first instruction moves down.
727  if (&*RegionBegin == MI)
728  ++RegionBegin;
729 
730  // Update the instruction stream.
731  BB->splice(InsertPos, BB, MI);
732 
733  // Update LiveIntervals
734  if (LIS)
735  LIS->handleMove(*MI, /*UpdateFlags=*/true);
736 
737  // Recede RegionBegin if an instruction moves above the first.
738  if (RegionBegin == InsertPos)
739  RegionBegin = MI;
740 }
741 
743 #ifndef NDEBUG
744  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
745  CurrentTop = CurrentBottom;
746  return false;
747  }
748  ++NumInstrsScheduled;
749 #endif
750  return true;
751 }
752 
753 /// Per-region scheduling driver, called back from
754 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
755 /// does not consider liveness or register pressure. It is useful for PostRA
756 /// scheduling and potentially other custom schedulers.
758  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
759  LLVM_DEBUG(SchedImpl->dumpPolicy());
760 
761  // Build the DAG.
762  buildSchedGraph(AA);
763 
764  Topo.InitDAGTopologicalSorting();
765 
766  postprocessDAG();
767 
768  SmallVector<SUnit*, 8> TopRoots, BotRoots;
769  findRootsAndBiasEdges(TopRoots, BotRoots);
770 
771  LLVM_DEBUG(dump());
772  if (PrintDAGs) dump();
773  if (ViewMISchedDAGs) viewGraph();
774 
775  // Initialize the strategy before modifying the DAG.
776  // This may initialize a DFSResult to be used for queue priority.
777  SchedImpl->initialize(this);
778 
779  // Initialize ready queues now that the DAG and priority data are finalized.
780  initQueues(TopRoots, BotRoots);
781 
782  bool IsTopNode = false;
783  while (true) {
784  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
785  SUnit *SU = SchedImpl->pickNode(IsTopNode);
786  if (!SU) break;
787 
788  assert(!SU->isScheduled && "Node already scheduled");
789  if (!checkSchedLimit())
790  break;
791 
792  MachineInstr *MI = SU->getInstr();
793  if (IsTopNode) {
794  assert(SU->isTopReady() && "node still has unscheduled dependencies");
795  if (&*CurrentTop == MI)
796  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
797  else
798  moveInstruction(MI, CurrentTop);
799  } else {
800  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
802  priorNonDebug(CurrentBottom, CurrentTop);
803  if (&*priorII == MI)
804  CurrentBottom = priorII;
805  else {
806  if (&*CurrentTop == MI)
807  CurrentTop = nextIfDebug(++CurrentTop, priorII);
808  moveInstruction(MI, CurrentBottom);
809  CurrentBottom = MI;
810  }
811  }
812  // Notify the scheduling strategy before updating the DAG.
813  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
814  // runs, it can then use the accurate ReadyCycle time to determine whether
815  // newly released nodes can move to the readyQ.
816  SchedImpl->schedNode(SU, IsTopNode);
817 
818  updateQueues(SU, IsTopNode);
819  }
820  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
821 
822  placeDebugValues();
823 
824  LLVM_DEBUG({
825  dbgs() << "*** Final schedule for "
826  << printMBBReference(*begin()->getParent()) << " ***\n";
827  dumpSchedule();
828  dbgs() << '\n';
829  });
830 }
831 
832 /// Apply each ScheduleDAGMutation step in order.
834  for (auto &m : Mutations)
835  m->apply(this);
836 }
837 
838 void ScheduleDAGMI::
840  SmallVectorImpl<SUnit*> &BotRoots) {
841  for (SUnit &SU : SUnits) {
842  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
843 
844  // Order predecessors so DFSResult follows the critical path.
845  SU.biasCriticalPath();
846 
847  // A SUnit is ready to top schedule if it has no predecessors.
848  if (!SU.NumPredsLeft)
849  TopRoots.push_back(&SU);
850  // A SUnit is ready to bottom schedule if it has no successors.
851  if (!SU.NumSuccsLeft)
852  BotRoots.push_back(&SU);
853  }
854  ExitSU.biasCriticalPath();
855 }
856 
857 /// Identify DAG roots and setup scheduler queues.
859  ArrayRef<SUnit*> BotRoots) {
860  NextClusterSucc = nullptr;
861  NextClusterPred = nullptr;
862 
863  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
864  //
865  // Nodes with unreleased weak edges can still be roots.
866  // Release top roots in forward order.
867  for (SUnit *SU : TopRoots)
868  SchedImpl->releaseTopNode(SU);
869 
870  // Release bottom roots in reverse order so the higher priority nodes appear
871  // first. This is more natural and slightly more efficient.
873  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
874  SchedImpl->releaseBottomNode(*I);
875  }
876 
877  releaseSuccessors(&EntrySU);
878  releasePredecessors(&ExitSU);
879 
880  SchedImpl->registerRoots();
881 
882  // Advance past initial DebugValues.
883  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
884  CurrentBottom = RegionEnd;
885 }
886 
887 /// Update scheduler queues after scheduling an instruction.
888 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
889  // Release dependent instructions for scheduling.
890  if (IsTopNode)
891  releaseSuccessors(SU);
892  else
893  releasePredecessors(SU);
894 
895  SU->isScheduled = true;
896 }
897 
898 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
900  // If first instruction was a DBG_VALUE then put it back.
901  if (FirstDbgValue) {
902  BB->splice(RegionBegin, BB, FirstDbgValue);
903  RegionBegin = FirstDbgValue;
904  }
905 
906  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
907  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
908  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
909  MachineInstr *DbgValue = P.first;
910  MachineBasicBlock::iterator OrigPrevMI = P.second;
911  if (&*RegionBegin == DbgValue)
912  ++RegionBegin;
913  BB->splice(++OrigPrevMI, BB, DbgValue);
914  if (OrigPrevMI == std::prev(RegionEnd))
915  RegionEnd = DbgValue;
916  }
917  DbgValues.clear();
918  FirstDbgValue = nullptr;
919 }
920 
921 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
923  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
924  if (SUnit *SU = getSUnit(&(*MI)))
925  dumpNode(*SU);
926  else
927  dbgs() << "Missing SUnit\n";
928  }
929 }
930 #endif
931 
932 //===----------------------------------------------------------------------===//
933 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
934 // preservation.
935 //===----------------------------------------------------------------------===//
936 
938  delete DFSResult;
939 }
940 
942  const MachineInstr &MI = *SU.getInstr();
943  for (const MachineOperand &MO : MI.operands()) {
944  if (!MO.isReg())
945  continue;
946  if (!MO.readsReg())
947  continue;
948  if (TrackLaneMasks && !MO.isUse())
949  continue;
950 
951  unsigned Reg = MO.getReg();
953  continue;
954 
955  // Ignore re-defs.
956  if (TrackLaneMasks) {
957  bool FoundDef = false;
958  for (const MachineOperand &MO2 : MI.operands()) {
959  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
960  FoundDef = true;
961  break;
962  }
963  }
964  if (FoundDef)
965  continue;
966  }
967 
968  // Record this local VReg use.
969  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
970  for (; UI != VRegUses.end(); ++UI) {
971  if (UI->SU == &SU)
972  break;
973  }
974  if (UI == VRegUses.end())
975  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
976  }
977 }
978 
979 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
980 /// crossing a scheduling boundary. [begin, end) includes all instructions in
981 /// the region, including the boundary itself and single-instruction regions
982 /// that don't get scheduled.
986  unsigned regioninstrs)
987 {
988  // ScheduleDAGMI initializes SchedImpl's per-region policy.
989  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
990 
991  // For convenience remember the end of the liveness region.
992  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
993 
994  SUPressureDiffs.clear();
995 
996  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
997  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
998 
999  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1000  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1001 }
1002 
1003 // Setup the register pressure trackers for the top scheduled top and bottom
1004 // scheduled regions.
1006  VRegUses.clear();
1007  VRegUses.setUniverse(MRI.getNumVirtRegs());
1008  for (SUnit &SU : SUnits)
1009  collectVRegUses(SU);
1010 
1011  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1012  ShouldTrackLaneMasks, false);
1013  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1014  ShouldTrackLaneMasks, false);
1015 
1016  // Close the RPTracker to finalize live ins.
1017  RPTracker.closeRegion();
1018 
1019  LLVM_DEBUG(RPTracker.dump());
1020 
1021  // Initialize the live ins and live outs.
1022  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1023  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1024 
1025  // Close one end of the tracker so we can call
1026  // getMaxUpward/DownwardPressureDelta before advancing across any
1027  // instructions. This converts currently live regs into live ins/outs.
1028  TopRPTracker.closeTop();
1029  BotRPTracker.closeBottom();
1030 
1031  BotRPTracker.initLiveThru(RPTracker);
1032  if (!BotRPTracker.getLiveThru().empty()) {
1033  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1034  LLVM_DEBUG(dbgs() << "Live Thru: ";
1035  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1036  };
1037 
1038  // For each live out vreg reduce the pressure change associated with other
1039  // uses of the same vreg below the live-out reaching def.
1040  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1041 
1042  // Account for liveness generated by the region boundary.
1043  if (LiveRegionEnd != RegionEnd) {
1045  BotRPTracker.recede(&LiveUses);
1046  updatePressureDiffs(LiveUses);
1047  }
1048 
1049  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1050  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1051  dbgs() << "Bottom Pressure:\n";
1052  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1053 
1054  assert((BotRPTracker.getPos() == RegionEnd ||
1055  (RegionEnd->isDebugInstr() &&
1056  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1057  "Can't find the region bottom");
1058 
1059  // Cache the list of excess pressure sets in this region. This will also track
1060  // the max pressure in the scheduled code for these sets.
1061  RegionCriticalPSets.clear();
1062  const std::vector<unsigned> &RegionPressure =
1063  RPTracker.getPressure().MaxSetPressure;
1064  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1065  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1066  if (RegionPressure[i] > Limit) {
1067  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1068  << " Actual " << RegionPressure[i] << "\n");
1069  RegionCriticalPSets.push_back(PressureChange(i));
1070  }
1071  }
1072  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1073  for (const PressureChange &RCPS
1074  : RegionCriticalPSets) dbgs()
1075  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1076  dbgs() << "\n");
1077 }
1078 
1081  const std::vector<unsigned> &NewMaxPressure) {
1082  const PressureDiff &PDiff = getPressureDiff(SU);
1083  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1084  for (const PressureChange &PC : PDiff) {
1085  if (!PC.isValid())
1086  break;
1087  unsigned ID = PC.getPSet();
1088  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1089  ++CritIdx;
1090  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1091  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1092  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1093  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1094  }
1095  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1096  if (NewMaxPressure[ID] >= Limit - 2) {
1097  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1098  << NewMaxPressure[ID]
1099  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1100  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1101  << " livethru)\n");
1102  }
1103  }
1104 }
1105 
1106 /// Update the PressureDiff array for liveness after scheduling this
1107 /// instruction.
1109  ArrayRef<RegisterMaskPair> LiveUses) {
1110  for (const RegisterMaskPair &P : LiveUses) {
1111  unsigned Reg = P.RegUnit;
1112  /// FIXME: Currently assuming single-use physregs.
1113  if (!TRI->isVirtualRegister(Reg))
1114  continue;
1115 
1116  if (ShouldTrackLaneMasks) {
1117  // If the register has just become live then other uses won't change
1118  // this fact anymore => decrement pressure.
1119  // If the register has just become dead then other uses make it come
1120  // back to life => increment pressure.
1121  bool Decrement = P.LaneMask.any();
1122 
1123  for (const VReg2SUnit &V2SU
1124  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1125  SUnit &SU = *V2SU.SU;
1126  if (SU.isScheduled || &SU == &ExitSU)
1127  continue;
1128 
1129  PressureDiff &PDiff = getPressureDiff(&SU);
1130  PDiff.addPressureChange(Reg, Decrement, &MRI);
1131  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1132  << printReg(Reg, TRI) << ':'
1133  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1134  dbgs() << " to "; PDiff.dump(*TRI););
1135  }
1136  } else {
1137  assert(P.LaneMask.any());
1138  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1139  // This may be called before CurrentBottom has been initialized. However,
1140  // BotRPTracker must have a valid position. We want the value live into the
1141  // instruction or live out of the block, so ask for the previous
1142  // instruction's live-out.
1143  const LiveInterval &LI = LIS->getInterval(Reg);
1144  VNInfo *VNI;
1146  nextIfDebug(BotRPTracker.getPos(), BB->end());
1147  if (I == BB->end())
1148  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1149  else {
1150  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1151  VNI = LRQ.valueIn();
1152  }
1153  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1154  assert(VNI && "No live value at use.");
1155  for (const VReg2SUnit &V2SU
1156  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1157  SUnit *SU = V2SU.SU;
1158  // If this use comes before the reaching def, it cannot be a last use,
1159  // so decrease its pressure change.
1160  if (!SU->isScheduled && SU != &ExitSU) {
1161  LiveQueryResult LRQ =
1162  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1163  if (LRQ.valueIn() == VNI) {
1164  PressureDiff &PDiff = getPressureDiff(SU);
1165  PDiff.addPressureChange(Reg, true, &MRI);
1166  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1167  << *SU->getInstr();
1168  dbgs() << " to "; PDiff.dump(*TRI););
1169  }
1170  }
1171  }
1172  }
1173  }
1174 }
1175 
1177 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1178  if (EntrySU.getInstr() != nullptr)
1179  dumpNodeAll(EntrySU);
1180  for (const SUnit &SU : SUnits) {
1181  dumpNodeAll(SU);
1182  if (ShouldTrackPressure) {
1183  dbgs() << " Pressure Diff : ";
1184  getPressureDiff(&SU).dump(*TRI);
1185  }
1186  dbgs() << " Single Issue : ";
1187  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1188  SchedModel.mustEndGroup(SU.getInstr()))
1189  dbgs() << "true;";
1190  else
1191  dbgs() << "false;";
1192  dbgs() << '\n';
1193  }
1194  if (ExitSU.getInstr() != nullptr)
1195  dumpNodeAll(ExitSU);
1196 #endif
1197 }
1198 
1199 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1200 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1201 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1202 ///
1203 /// This is a skeletal driver, with all the functionality pushed into helpers,
1204 /// so that it can be easily extended by experimental schedulers. Generally,
1205 /// implementing MachineSchedStrategy should be sufficient to implement a new
1206 /// scheduling algorithm. However, if a scheduler further subclasses
1207 /// ScheduleDAGMILive then it will want to override this virtual method in order
1208 /// to update any specialized state.
1210  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1211  LLVM_DEBUG(SchedImpl->dumpPolicy());
1212  buildDAGWithRegPressure();
1213 
1214  Topo.InitDAGTopologicalSorting();
1215 
1216  postprocessDAG();
1217 
1218  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1219  findRootsAndBiasEdges(TopRoots, BotRoots);
1220 
1221  // Initialize the strategy before modifying the DAG.
1222  // This may initialize a DFSResult to be used for queue priority.
1223  SchedImpl->initialize(this);
1224 
1225  LLVM_DEBUG(dump());
1226  if (PrintDAGs) dump();
1227  if (ViewMISchedDAGs) viewGraph();
1228 
1229  // Initialize ready queues now that the DAG and priority data are finalized.
1230  initQueues(TopRoots, BotRoots);
1231 
1232  bool IsTopNode = false;
1233  while (true) {
1234  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1235  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1236  if (!SU) break;
1237 
1238  assert(!SU->isScheduled && "Node already scheduled");
1239  if (!checkSchedLimit())
1240  break;
1241 
1242  scheduleMI(SU, IsTopNode);
1243 
1244  if (DFSResult) {
1245  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1246  if (!ScheduledTrees.test(SubtreeID)) {
1247  ScheduledTrees.set(SubtreeID);
1248  DFSResult->scheduleTree(SubtreeID);
1249  SchedImpl->scheduleTree(SubtreeID);
1250  }
1251  }
1252 
1253  // Notify the scheduling strategy after updating the DAG.
1254  SchedImpl->schedNode(SU, IsTopNode);
1255 
1256  updateQueues(SU, IsTopNode);
1257  }
1258  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1259 
1260  placeDebugValues();
1261 
1262  LLVM_DEBUG({
1263  dbgs() << "*** Final schedule for "
1264  << printMBBReference(*begin()->getParent()) << " ***\n";
1265  dumpSchedule();
1266  dbgs() << '\n';
1267  });
1268 }
1269 
1270 /// Build the DAG and setup three register pressure trackers.
1272  if (!ShouldTrackPressure) {
1273  RPTracker.reset();
1274  RegionCriticalPSets.clear();
1275  buildSchedGraph(AA);
1276  return;
1277  }
1278 
1279  // Initialize the register pressure tracker used by buildSchedGraph.
1280  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1281  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1282 
1283  // Account for liveness generate by the region boundary.
1284  if (LiveRegionEnd != RegionEnd)
1285  RPTracker.recede();
1286 
1287  // Build the DAG, and compute current register pressure.
1288  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1289 
1290  // Initialize top/bottom trackers after computing region pressure.
1291  initRegPressure();
1292 }
1293 
1295  if (!DFSResult)
1296  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1297  DFSResult->clear();
1298  ScheduledTrees.clear();
1299  DFSResult->resize(SUnits.size());
1300  DFSResult->compute(SUnits);
1301  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1302 }
1303 
1304 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1305 /// only provides the critical path for single block loops. To handle loops that
1306 /// span blocks, we could use the vreg path latencies provided by
1307 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1308 /// available for use in the scheduler.
1309 ///
1310 /// The cyclic path estimation identifies a def-use pair that crosses the back
1311 /// edge and considers the depth and height of the nodes. For example, consider
1312 /// the following instruction sequence where each instruction has unit latency
1313 /// and defines an epomymous virtual register:
1314 ///
1315 /// a->b(a,c)->c(b)->d(c)->exit
1316 ///
1317 /// The cyclic critical path is a two cycles: b->c->b
1318 /// The acyclic critical path is four cycles: a->b->c->d->exit
1319 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1320 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1321 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1322 /// LiveInDepth = depth(b) = len(a->b) = 1
1323 ///
1324 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1325 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1326 /// CyclicCriticalPath = min(2, 2) = 2
1327 ///
1328 /// This could be relevant to PostRA scheduling, but is currently implemented
1329 /// assuming LiveIntervals.
1331  // This only applies to single block loop.
1332  if (!BB->isSuccessor(BB))
1333  return 0;
1334 
1335  unsigned MaxCyclicLatency = 0;
1336  // Visit each live out vreg def to find def/use pairs that cross iterations.
1337  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1338  unsigned Reg = P.RegUnit;
1339  if (!TRI->isVirtualRegister(Reg))
1340  continue;
1341  const LiveInterval &LI = LIS->getInterval(Reg);
1342  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1343  if (!DefVNI)
1344  continue;
1345 
1346  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1347  const SUnit *DefSU = getSUnit(DefMI);
1348  if (!DefSU)
1349  continue;
1350 
1351  unsigned LiveOutHeight = DefSU->getHeight();
1352  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1353  // Visit all local users of the vreg def.
1354  for (const VReg2SUnit &V2SU
1355  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1356  SUnit *SU = V2SU.SU;
1357  if (SU == &ExitSU)
1358  continue;
1359 
1360  // Only consider uses of the phi.
1361  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1362  if (!LRQ.valueIn()->isPHIDef())
1363  continue;
1364 
1365  // Assume that a path spanning two iterations is a cycle, which could
1366  // overestimate in strange cases. This allows cyclic latency to be
1367  // estimated as the minimum slack of the vreg's depth or height.
1368  unsigned CyclicLatency = 0;
1369  if (LiveOutDepth > SU->getDepth())
1370  CyclicLatency = LiveOutDepth - SU->getDepth();
1371 
1372  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1373  if (LiveInHeight > LiveOutHeight) {
1374  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1375  CyclicLatency = LiveInHeight - LiveOutHeight;
1376  } else
1377  CyclicLatency = 0;
1378 
1379  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1380  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1381  if (CyclicLatency > MaxCyclicLatency)
1382  MaxCyclicLatency = CyclicLatency;
1383  }
1384  }
1385  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1386  return MaxCyclicLatency;
1387 }
1388 
1389 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1390 /// the Top RP tracker in case the region beginning has changed.
1392  ArrayRef<SUnit*> BotRoots) {
1393  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1394  if (ShouldTrackPressure) {
1395  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1396  TopRPTracker.setPos(CurrentTop);
1397  }
1398 }
1399 
1400 /// Move an instruction and update register pressure.
1401 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1402  // Move the instruction to its new location in the instruction stream.
1403  MachineInstr *MI = SU->getInstr();
1404 
1405  if (IsTopNode) {
1406  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1407  if (&*CurrentTop == MI)
1408  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1409  else {
1410  moveInstruction(MI, CurrentTop);
1411  TopRPTracker.setPos(MI);
1412  }
1413 
1414  if (ShouldTrackPressure) {
1415  // Update top scheduled pressure.
1416  RegisterOperands RegOpers;
1417  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1418  if (ShouldTrackLaneMasks) {
1419  // Adjust liveness and add missing dead+read-undef flags.
1420  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1421  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1422  } else {
1423  // Adjust for missing dead-def flags.
1424  RegOpers.detectDeadDefs(*MI, *LIS);
1425  }
1426 
1427  TopRPTracker.advance(RegOpers);
1428  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1429  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1430  TopRPTracker.getRegSetPressureAtPos(), TRI););
1431 
1432  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1433  }
1434  } else {
1435  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1436  MachineBasicBlock::iterator priorII =
1437  priorNonDebug(CurrentBottom, CurrentTop);
1438  if (&*priorII == MI)
1439  CurrentBottom = priorII;
1440  else {
1441  if (&*CurrentTop == MI) {
1442  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1443  TopRPTracker.setPos(CurrentTop);
1444  }
1445  moveInstruction(MI, CurrentBottom);
1446  CurrentBottom = MI;
1447  BotRPTracker.setPos(CurrentBottom);
1448  }
1449  if (ShouldTrackPressure) {
1450  RegisterOperands RegOpers;
1451  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1452  if (ShouldTrackLaneMasks) {
1453  // Adjust liveness and add missing dead+read-undef flags.
1454  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1455  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1456  } else {
1457  // Adjust for missing dead-def flags.
1458  RegOpers.detectDeadDefs(*MI, *LIS);
1459  }
1460 
1461  if (BotRPTracker.getPos() != CurrentBottom)
1462  BotRPTracker.recedeSkipDebugValues();
1464  BotRPTracker.recede(RegOpers, &LiveUses);
1465  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1466  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1467  BotRPTracker.getRegSetPressureAtPos(), TRI););
1468 
1469  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1470  updatePressureDiffs(LiveUses);
1471  }
1472  }
1473 }
1474 
1475 //===----------------------------------------------------------------------===//
1476 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1477 //===----------------------------------------------------------------------===//
1478 
1479 namespace {
1480 
1481 /// Post-process the DAG to create cluster edges between neighboring
1482 /// loads or between neighboring stores.
1483 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1484  struct MemOpInfo {
1485  SUnit *SU;
1486  MachineOperand *BaseOp;
1487  int64_t Offset;
1488 
1489  MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
1490  : SU(su), BaseOp(Op), Offset(ofs) {}
1491 
1492  bool operator<(const MemOpInfo &RHS) const {
1493  if (BaseOp->getType() != RHS.BaseOp->getType())
1494  return BaseOp->getType() < RHS.BaseOp->getType();
1495 
1496  if (BaseOp->isReg())
1497  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1498  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1499  RHS.SU->NodeNum);
1500  if (BaseOp->isFI()) {
1501  const MachineFunction &MF =
1502  *BaseOp->getParent()->getParent()->getParent();
1503  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1504  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1506  // Can't use tuple comparison here since we might need to use a
1507  // different order when the stack grows down.
1508  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1509  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1510  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1511 
1512  if (Offset != RHS.Offset)
1513  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1514 
1515  return SU->NodeNum < RHS.SU->NodeNum;
1516  }
1517 
1518  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1519  "index bases.");
1520  }
1521  };
1522 
1523  const TargetInstrInfo *TII;
1524  const TargetRegisterInfo *TRI;
1525  bool IsLoad;
1526 
1527 public:
1528  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1529  const TargetRegisterInfo *tri, bool IsLoad)
1530  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1531 
1532  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1533 
1534 protected:
1535  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1536 };
1537 
1538 class StoreClusterMutation : public BaseMemOpClusterMutation {
1539 public:
1540  StoreClusterMutation(const TargetInstrInfo *tii,
1541  const TargetRegisterInfo *tri)
1542  : BaseMemOpClusterMutation(tii, tri, false) {}
1543 };
1544 
1545 class LoadClusterMutation : public BaseMemOpClusterMutation {
1546 public:
1547  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1548  : BaseMemOpClusterMutation(tii, tri, true) {}
1549 };
1550 
1551 } // end anonymous namespace
1552 
1553 namespace llvm {
1554 
1555 std::unique_ptr<ScheduleDAGMutation>
1557  const TargetRegisterInfo *TRI) {
1558  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1559  : nullptr;
1560 }
1561 
1562 std::unique_ptr<ScheduleDAGMutation>
1564  const TargetRegisterInfo *TRI) {
1565  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1566  : nullptr;
1567 }
1568 
1569 } // end namespace llvm
1570 
1571 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1572  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1573  SmallVector<MemOpInfo, 32> MemOpRecords;
1574  for (SUnit *SU : MemOps) {
1575  MachineOperand *BaseOp;
1576  int64_t Offset;
1577  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1578  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1579  }
1580  if (MemOpRecords.size() < 2)
1581  return;
1582 
1583  llvm::sort(MemOpRecords);
1584  unsigned ClusterLength = 1;
1585  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1586  SUnit *SUa = MemOpRecords[Idx].SU;
1587  SUnit *SUb = MemOpRecords[Idx+1].SU;
1588  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1589  *MemOpRecords[Idx + 1].BaseOp,
1590  ClusterLength) &&
1591  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1592  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1593  << SUb->NodeNum << ")\n");
1594  // Copy successor edges from SUa to SUb. Interleaving computation
1595  // dependent on SUa can prevent load combining due to register reuse.
1596  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1597  // loads should have effectively the same inputs.
1598  for (const SDep &Succ : SUa->Succs) {
1599  if (Succ.getSUnit() == SUb)
1600  continue;
1601  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1602  << ")\n");
1603  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1604  }
1605  ++ClusterLength;
1606  } else
1607  ClusterLength = 1;
1608  }
1609 }
1610 
1611 /// Callback from DAG postProcessing to create cluster edges for loads.
1613  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1614 
1615  // Map DAG NodeNum to store chain ID.
1616  DenseMap<unsigned, unsigned> StoreChainIDs;
1617  // Map each store chain to a set of dependent MemOps.
1618  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1619  for (SUnit &SU : DAG->SUnits) {
1620  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1621  (!IsLoad && !SU.getInstr()->mayStore()))
1622  continue;
1623 
1624  unsigned ChainPredID = DAG->SUnits.size();
1625  for (const SDep &Pred : SU.Preds) {
1626  if (Pred.isCtrl()) {
1627  ChainPredID = Pred.getSUnit()->NodeNum;
1628  break;
1629  }
1630  }
1631  // Check if this chain-like pred has been seen
1632  // before. ChainPredID==MaxNodeID at the top of the schedule.
1633  unsigned NumChains = StoreChainDependents.size();
1634  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1635  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1636  if (Result.second)
1637  StoreChainDependents.resize(NumChains + 1);
1638  StoreChainDependents[Result.first->second].push_back(&SU);
1639  }
1640 
1641  // Iterate over the store chains.
1642  for (auto &SCD : StoreChainDependents)
1643  clusterNeighboringMemOps(SCD, DAG);
1644 }
1645 
1646 //===----------------------------------------------------------------------===//
1647 // CopyConstrain - DAG post-processing to encourage copy elimination.
1648 //===----------------------------------------------------------------------===//
1649 
1650 namespace {
1651 
1652 /// Post-process the DAG to create weak edges from all uses of a copy to
1653 /// the one use that defines the copy's source vreg, most likely an induction
1654 /// variable increment.
1655 class CopyConstrain : public ScheduleDAGMutation {
1656  // Transient state.
1657  SlotIndex RegionBeginIdx;
1658 
1659  // RegionEndIdx is the slot index of the last non-debug instruction in the
1660  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1661  SlotIndex RegionEndIdx;
1662 
1663 public:
1664  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1665 
1666  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1667 
1668 protected:
1669  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1670 };
1671 
1672 } // end anonymous namespace
1673 
1674 namespace llvm {
1675 
1676 std::unique_ptr<ScheduleDAGMutation>
1678  const TargetRegisterInfo *TRI) {
1679  return llvm::make_unique<CopyConstrain>(TII, TRI);
1680 }
1681 
1682 } // end namespace llvm
1683 
1684 /// constrainLocalCopy handles two possibilities:
1685 /// 1) Local src:
1686 /// I0: = dst
1687 /// I1: src = ...
1688 /// I2: = dst
1689 /// I3: dst = src (copy)
1690 /// (create pred->succ edges I0->I1, I2->I1)
1691 ///
1692 /// 2) Local copy:
1693 /// I0: dst = src (copy)
1694 /// I1: = dst
1695 /// I2: src = ...
1696 /// I3: = dst
1697 /// (create pred->succ edges I1->I2, I3->I2)
1698 ///
1699 /// Although the MachineScheduler is currently constrained to single blocks,
1700 /// this algorithm should handle extended blocks. An EBB is a set of
1701 /// contiguously numbered blocks such that the previous block in the EBB is
1702 /// always the single predecessor.
1703 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1704  LiveIntervals *LIS = DAG->getLIS();
1705  MachineInstr *Copy = CopySU->getInstr();
1706 
1707  // Check for pure vreg copies.
1708  const MachineOperand &SrcOp = Copy->getOperand(1);
1709  unsigned SrcReg = SrcOp.getReg();
1710  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1711  return;
1712 
1713  const MachineOperand &DstOp = Copy->getOperand(0);
1714  unsigned DstReg = DstOp.getReg();
1715  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1716  return;
1717 
1718  // Check if either the dest or source is local. If it's live across a back
1719  // edge, it's not local. Note that if both vregs are live across the back
1720  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1721  // If both the copy's source and dest are local live intervals, then we
1722  // should treat the dest as the global for the purpose of adding
1723  // constraints. This adds edges from source's other uses to the copy.
1724  unsigned LocalReg = SrcReg;
1725  unsigned GlobalReg = DstReg;
1726  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1727  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1728  LocalReg = DstReg;
1729  GlobalReg = SrcReg;
1730  LocalLI = &LIS->getInterval(LocalReg);
1731  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1732  return;
1733  }
1734  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1735 
1736  // Find the global segment after the start of the local LI.
1737  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1738  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1739  // local live range. We could create edges from other global uses to the local
1740  // start, but the coalescer should have already eliminated these cases, so
1741  // don't bother dealing with it.
1742  if (GlobalSegment == GlobalLI->end())
1743  return;
1744 
1745  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1746  // returned the next global segment. But if GlobalSegment overlaps with
1747  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1748  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1749  if (GlobalSegment->contains(LocalLI->beginIndex()))
1750  ++GlobalSegment;
1751 
1752  if (GlobalSegment == GlobalLI->end())
1753  return;
1754 
1755  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1756  if (GlobalSegment != GlobalLI->begin()) {
1757  // Two address defs have no hole.
1758  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1759  GlobalSegment->start)) {
1760  return;
1761  }
1762  // If the prior global segment may be defined by the same two-address
1763  // instruction that also defines LocalLI, then can't make a hole here.
1764  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1765  LocalLI->beginIndex())) {
1766  return;
1767  }
1768  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1769  // it would be a disconnected component in the live range.
1770  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1771  "Disconnected LRG within the scheduling region.");
1772  }
1773  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1774  if (!GlobalDef)
1775  return;
1776 
1777  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1778  if (!GlobalSU)
1779  return;
1780 
1781  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1782  // constraining the uses of the last local def to precede GlobalDef.
1783  SmallVector<SUnit*,8> LocalUses;
1784  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1785  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1786  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1787  for (const SDep &Succ : LastLocalSU->Succs) {
1788  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1789  continue;
1790  if (Succ.getSUnit() == GlobalSU)
1791  continue;
1792  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1793  return;
1794  LocalUses.push_back(Succ.getSUnit());
1795  }
1796  // Open the top of the GlobalLI hole by constraining any earlier global uses
1797  // to precede the start of LocalLI.
1798  SmallVector<SUnit*,8> GlobalUses;
1799  MachineInstr *FirstLocalDef =
1800  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1801  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1802  for (const SDep &Pred : GlobalSU->Preds) {
1803  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1804  continue;
1805  if (Pred.getSUnit() == FirstLocalSU)
1806  continue;
1807  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1808  return;
1809  GlobalUses.push_back(Pred.getSUnit());
1810  }
1811  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1812  // Add the weak edges.
1814  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1815  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1816  << GlobalSU->NodeNum << ")\n");
1817  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1818  }
1820  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1821  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1822  << FirstLocalSU->NodeNum << ")\n");
1823  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1824  }
1825 }
1826 
1827 /// Callback from DAG postProcessing to create weak edges to encourage
1828 /// copy elimination.
1829 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1830  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1831  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1832 
1833  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1834  if (FirstPos == DAG->end())
1835  return;
1836  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1837  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1838  *priorNonDebug(DAG->end(), DAG->begin()));
1839 
1840  for (SUnit &SU : DAG->SUnits) {
1841  if (!SU.getInstr()->isCopy())
1842  continue;
1843 
1844  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1845  }
1846 }
1847 
1848 //===----------------------------------------------------------------------===//
1849 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1850 // and possibly other custom schedulers.
1851 //===----------------------------------------------------------------------===//
1852 
1853 static const unsigned InvalidCycle = ~0U;
1854 
1855 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1856 
1857 /// Given a Count of resource usage and a Latency value, return true if a
1858 /// SchedBoundary becomes resource limited.
1859 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1860  unsigned Latency) {
1861  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1862 }
1863 
1865  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1866  // Destroying and reconstructing it is very expensive though. So keep
1867  // invalid, placeholder HazardRecs.
1868  if (HazardRec && HazardRec->isEnabled()) {
1869  delete HazardRec;
1870  HazardRec = nullptr;
1871  }
1872  Available.clear();
1873  Pending.clear();
1874  CheckPending = false;
1875  CurrCycle = 0;
1876  CurrMOps = 0;
1877  MinReadyCycle = std::numeric_limits<unsigned>::max();
1878  ExpectedLatency = 0;
1879  DependentLatency = 0;
1880  RetiredMOps = 0;
1881  MaxExecutedResCount = 0;
1882  ZoneCritResIdx = 0;
1883  IsResourceLimited = false;
1884  ReservedCycles.clear();
1885 #ifndef NDEBUG
1886  // Track the maximum number of stall cycles that could arise either from the
1887  // latency of a DAG edge or the number of cycles that a processor resource is
1888  // reserved (SchedBoundary::ReservedCycles).
1889  MaxObservedStall = 0;
1890 #endif
1891  // Reserve a zero-count for invalid CritResIdx.
1892  ExecutedResCounts.resize(1);
1893  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1894 }
1895 
1896 void SchedRemainder::
1897 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1898  reset();
1899  if (!SchedModel->hasInstrSchedModel())
1900  return;
1901  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1902  for (SUnit &SU : DAG->SUnits) {
1903  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1904  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1905  * SchedModel->getMicroOpFactor();
1907  PI = SchedModel->getWriteProcResBegin(SC),
1908  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1909  unsigned PIdx = PI->ProcResourceIdx;
1910  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1911  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1912  }
1913  }
1914 }
1915 
1916 void SchedBoundary::
1918  reset();
1919  DAG = dag;
1920  SchedModel = smodel;
1921  Rem = rem;
1922  if (SchedModel->hasInstrSchedModel()) {
1923  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1924  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1925  }
1926 }
1927 
1928 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1929 /// these "soft stalls" differently than the hard stall cycles based on CPU
1930 /// resources and computed by checkHazard(). A fully in-order model
1931 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1932 /// available for scheduling until they are ready. However, a weaker in-order
1933 /// model may use this for heuristics. For example, if a processor has in-order
1934 /// behavior when reading certain resources, this may come into play.
1936  if (!SU->isUnbuffered)
1937  return 0;
1938 
1939  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1940  if (ReadyCycle > CurrCycle)
1941  return ReadyCycle - CurrCycle;
1942  return 0;
1943 }
1944 
1945 /// Compute the next cycle at which the given processor resource can be
1946 /// scheduled.
1947 unsigned SchedBoundary::
1948 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1949  unsigned NextUnreserved = ReservedCycles[PIdx];
1950  // If this resource has never been used, always return cycle zero.
1951  if (NextUnreserved == InvalidCycle)
1952  return 0;
1953  // For bottom-up scheduling add the cycles needed for the current operation.
1954  if (!isTop())
1955  NextUnreserved += Cycles;
1956  return NextUnreserved;
1957 }
1958 
1959 /// Does this SU have a hazard within the current instruction group.
1960 ///
1961 /// The scheduler supports two modes of hazard recognition. The first is the
1962 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1963 /// supports highly complicated in-order reservation tables
1964 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1965 ///
1966 /// The second is a streamlined mechanism that checks for hazards based on
1967 /// simple counters that the scheduler itself maintains. It explicitly checks
1968 /// for instruction dispatch limitations, including the number of micro-ops that
1969 /// can dispatch per cycle.
1970 ///
1971 /// TODO: Also check whether the SU must start a new group.
1973  if (HazardRec->isEnabled()
1974  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1975  return true;
1976  }
1977 
1978  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1979  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1980  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1981  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1982  return true;
1983  }
1984 
1985  if (CurrMOps > 0 &&
1986  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1987  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1988  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1989  << (isTop() ? "begin" : "end") << " group\n");
1990  return true;
1991  }
1992 
1993  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1994  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1995  for (const MCWriteProcResEntry &PE :
1996  make_range(SchedModel->getWriteProcResBegin(SC),
1997  SchedModel->getWriteProcResEnd(SC))) {
1998  unsigned ResIdx = PE.ProcResourceIdx;
1999  unsigned Cycles = PE.Cycles;
2000  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
2001  if (NRCycle > CurrCycle) {
2002 #ifndef NDEBUG
2003  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2004 #endif
2005  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2006  << SchedModel->getResourceName(ResIdx) << "="
2007  << NRCycle << "c\n");
2008  return true;
2009  }
2010  }
2011  }
2012  return false;
2013 }
2014 
2015 // Find the unscheduled node in ReadySUs with the highest latency.
2016 unsigned SchedBoundary::
2018  SUnit *LateSU = nullptr;
2019  unsigned RemLatency = 0;
2020  for (SUnit *SU : ReadySUs) {
2021  unsigned L = getUnscheduledLatency(SU);
2022  if (L > RemLatency) {
2023  RemLatency = L;
2024  LateSU = SU;
2025  }
2026  }
2027  if (LateSU) {
2028  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2029  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2030  }
2031  return RemLatency;
2032 }
2033 
2034 // Count resources in this zone and the remaining unscheduled
2035 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2036 // resource index, or zero if the zone is issue limited.
2037 unsigned SchedBoundary::
2038 getOtherResourceCount(unsigned &OtherCritIdx) {
2039  OtherCritIdx = 0;
2040  if (!SchedModel->hasInstrSchedModel())
2041  return 0;
2042 
2043  unsigned OtherCritCount = Rem->RemIssueCount
2044  + (RetiredMOps * SchedModel->getMicroOpFactor());
2045  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2046  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2047  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2048  PIdx != PEnd; ++PIdx) {
2049  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2050  if (OtherCount > OtherCritCount) {
2051  OtherCritCount = OtherCount;
2052  OtherCritIdx = PIdx;
2053  }
2054  }
2055  if (OtherCritIdx) {
2056  LLVM_DEBUG(
2057  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2058  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2059  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2060  }
2061  return OtherCritCount;
2062 }
2063 
2064 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2065  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2066 
2067 #ifndef NDEBUG
2068  // ReadyCycle was been bumped up to the CurrCycle when this node was
2069  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2070  // scheduling, so may now be greater than ReadyCycle.
2071  if (ReadyCycle > CurrCycle)
2072  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2073 #endif
2074 
2075  if (ReadyCycle < MinReadyCycle)
2076  MinReadyCycle = ReadyCycle;
2077 
2078  // Check for interlocks first. For the purpose of other heuristics, an
2079  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2080  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2081  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2082  Available.size() >= ReadyListLimit)
2083  Pending.push(SU);
2084  else
2085  Available.push(SU);
2086 }
2087 
2088 /// Move the boundary of scheduled code by one cycle.
2089 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2090  if (SchedModel->getMicroOpBufferSize() == 0) {
2091  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2092  "MinReadyCycle uninitialized");
2093  if (MinReadyCycle > NextCycle)
2094  NextCycle = MinReadyCycle;
2095  }
2096  // Update the current micro-ops, which will issue in the next cycle.
2097  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2098  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2099 
2100  // Decrement DependentLatency based on the next cycle.
2101  if ((NextCycle - CurrCycle) > DependentLatency)
2102  DependentLatency = 0;
2103  else
2104  DependentLatency -= (NextCycle - CurrCycle);
2105 
2106  if (!HazardRec->isEnabled()) {
2107  // Bypass HazardRec virtual calls.
2108  CurrCycle = NextCycle;
2109  } else {
2110  // Bypass getHazardType calls in case of long latency.
2111  for (; CurrCycle != NextCycle; ++CurrCycle) {
2112  if (isTop())
2113  HazardRec->AdvanceCycle();
2114  else
2115  HazardRec->RecedeCycle();
2116  }
2117  }
2118  CheckPending = true;
2119  IsResourceLimited =
2120  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2121  getScheduledLatency());
2122 
2123  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2124  << '\n');
2125 }
2126 
2127 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2128  ExecutedResCounts[PIdx] += Count;
2129  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2130  MaxExecutedResCount = ExecutedResCounts[PIdx];
2131 }
2132 
2133 /// Add the given processor resource to this scheduled zone.
2134 ///
2135 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2136 /// during which this resource is consumed.
2137 ///
2138 /// \return the next cycle at which the instruction may execute without
2139 /// oversubscribing resources.
2140 unsigned SchedBoundary::
2141 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2142  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2143  unsigned Count = Factor * Cycles;
2144  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2145  << Cycles << "x" << Factor << "u\n");
2146 
2147  // Update Executed resources counts.
2148  incExecutedResources(PIdx, Count);
2149  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2150  Rem->RemainingCounts[PIdx] -= Count;
2151 
2152  // Check if this resource exceeds the current critical resource. If so, it
2153  // becomes the critical resource.
2154  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2155  ZoneCritResIdx = PIdx;
2156  LLVM_DEBUG(dbgs() << " *** Critical resource "
2157  << SchedModel->getResourceName(PIdx) << ": "
2158  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2159  << "c\n");
2160  }
2161  // For reserved resources, record the highest cycle using the resource.
2162  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2163  if (NextAvailable > CurrCycle) {
2164  LLVM_DEBUG(dbgs() << " Resource conflict: "
2165  << SchedModel->getProcResource(PIdx)->Name
2166  << " reserved until @" << NextAvailable << "\n");
2167  }
2168  return NextAvailable;
2169 }
2170 
2171 /// Move the boundary of scheduled code by one SUnit.
2173  // Update the reservation table.
2174  if (HazardRec->isEnabled()) {
2175  if (!isTop() && SU->isCall) {
2176  // Calls are scheduled with their preceding instructions. For bottom-up
2177  // scheduling, clear the pipeline state before emitting.
2178  HazardRec->Reset();
2179  }
2180  HazardRec->EmitInstruction(SU);
2181  }
2182  // checkHazard should prevent scheduling multiple instructions per cycle that
2183  // exceed the issue width.
2184  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2185  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2186  assert(
2187  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2188  "Cannot schedule this instruction's MicroOps in the current cycle.");
2189 
2190  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2191  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2192 
2193  unsigned NextCycle = CurrCycle;
2194  switch (SchedModel->getMicroOpBufferSize()) {
2195  case 0:
2196  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2197  break;
2198  case 1:
2199  if (ReadyCycle > NextCycle) {
2200  NextCycle = ReadyCycle;
2201  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2202  }
2203  break;
2204  default:
2205  // We don't currently model the OOO reorder buffer, so consider all
2206  // scheduled MOps to be "retired". We do loosely model in-order resource
2207  // latency. If this instruction uses an in-order resource, account for any
2208  // likely stall cycles.
2209  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2210  NextCycle = ReadyCycle;
2211  break;
2212  }
2213  RetiredMOps += IncMOps;
2214 
2215  // Update resource counts and critical resource.
2216  if (SchedModel->hasInstrSchedModel()) {
2217  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2218  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2219  Rem->RemIssueCount -= DecRemIssue;
2220  if (ZoneCritResIdx) {
2221  // Scale scheduled micro-ops for comparing with the critical resource.
2222  unsigned ScaledMOps =
2223  RetiredMOps * SchedModel->getMicroOpFactor();
2224 
2225  // If scaled micro-ops are now more than the previous critical resource by
2226  // a full cycle, then micro-ops issue becomes critical.
2227  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2228  >= (int)SchedModel->getLatencyFactor()) {
2229  ZoneCritResIdx = 0;
2230  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2231  << ScaledMOps / SchedModel->getLatencyFactor()
2232  << "c\n");
2233  }
2234  }
2236  PI = SchedModel->getWriteProcResBegin(SC),
2237  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2238  unsigned RCycle =
2239  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2240  if (RCycle > NextCycle)
2241  NextCycle = RCycle;
2242  }
2243  if (SU->hasReservedResource) {
2244  // For reserved resources, record the highest cycle using the resource.
2245  // For top-down scheduling, this is the cycle in which we schedule this
2246  // instruction plus the number of cycles the operations reserves the
2247  // resource. For bottom-up is it simply the instruction's cycle.
2249  PI = SchedModel->getWriteProcResBegin(SC),
2250  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2251  unsigned PIdx = PI->ProcResourceIdx;
2252  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2253  if (isTop()) {
2254  ReservedCycles[PIdx] =
2255  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2256  }
2257  else
2258  ReservedCycles[PIdx] = NextCycle;
2259  }
2260  }
2261  }
2262  }
2263  // Update ExpectedLatency and DependentLatency.
2264  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2265  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2266  if (SU->getDepth() > TopLatency) {
2267  TopLatency = SU->getDepth();
2268  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2269  << SU->NodeNum << ") " << TopLatency << "c\n");
2270  }
2271  if (SU->getHeight() > BotLatency) {
2272  BotLatency = SU->getHeight();
2273  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2274  << SU->NodeNum << ") " << BotLatency << "c\n");
2275  }
2276  // If we stall for any reason, bump the cycle.
2277  if (NextCycle > CurrCycle)
2278  bumpCycle(NextCycle);
2279  else
2280  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2281  // resource limited. If a stall occurred, bumpCycle does this.
2282  IsResourceLimited =
2283  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2284  getScheduledLatency());
2285 
2286  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2287  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2288  // one cycle. Since we commonly reach the max MOps here, opportunistically
2289  // bump the cycle to avoid uselessly checking everything in the readyQ.
2290  CurrMOps += IncMOps;
2291 
2292  // Bump the cycle count for issue group constraints.
2293  // This must be done after NextCycle has been adjust for all other stalls.
2294  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2295  // currCycle to X.
2296  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2297  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2298  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2299  << " group\n");
2300  bumpCycle(++NextCycle);
2301  }
2302 
2303  while (CurrMOps >= SchedModel->getIssueWidth()) {
2304  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2305  << CurrCycle << '\n');
2306  bumpCycle(++NextCycle);
2307  }
2308  LLVM_DEBUG(dumpScheduledState());
2309 }
2310 
2311 /// Release pending ready nodes in to the available queue. This makes them
2312 /// visible to heuristics.
2314  // If the available queue is empty, it is safe to reset MinReadyCycle.
2315  if (Available.empty())
2316  MinReadyCycle = std::numeric_limits<unsigned>::max();
2317 
2318  // Check to see if any of the pending instructions are ready to issue. If
2319  // so, add them to the available queue.
2320  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2321  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2322  SUnit *SU = *(Pending.begin()+i);
2323  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2324 
2325  if (ReadyCycle < MinReadyCycle)
2326  MinReadyCycle = ReadyCycle;
2327 
2328  if (!IsBuffered && ReadyCycle > CurrCycle)
2329  continue;
2330 
2331  if (checkHazard(SU))
2332  continue;
2333 
2334  if (Available.size() >= ReadyListLimit)
2335  break;
2336 
2337  Available.push(SU);
2338  Pending.remove(Pending.begin()+i);
2339  --i; --e;
2340  }
2341  CheckPending = false;
2342 }
2343 
2344 /// Remove SU from the ready set for this boundary.
2346  if (Available.isInQueue(SU))
2347  Available.remove(Available.find(SU));
2348  else {
2349  assert(Pending.isInQueue(SU) && "bad ready count");
2350  Pending.remove(Pending.find(SU));
2351  }
2352 }
2353 
2354 /// If this queue only has one ready candidate, return it. As a side effect,
2355 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2356 /// one node is ready. If multiple instructions are ready, return NULL.
2358  if (CheckPending)
2359  releasePending();
2360 
2361  if (CurrMOps > 0) {
2362  // Defer any ready instrs that now have a hazard.
2363  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2364  if (checkHazard(*I)) {
2365  Pending.push(*I);
2366  I = Available.remove(I);
2367  continue;
2368  }
2369  ++I;
2370  }
2371  }
2372  for (unsigned i = 0; Available.empty(); ++i) {
2373 // FIXME: Re-enable assert once PR20057 is resolved.
2374 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2375 // "permanent hazard");
2376  (void)i;
2377  bumpCycle(CurrCycle + 1);
2378  releasePending();
2379  }
2380 
2381  LLVM_DEBUG(Pending.dump());
2382  LLVM_DEBUG(Available.dump());
2383 
2384  if (Available.size() == 1)
2385  return *Available.begin();
2386  return nullptr;
2387 }
2388 
2389 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2390 // This is useful information to dump after bumpNode.
2391 // Note that the Queue contents are more useful before pickNodeFromQueue.
2393  unsigned ResFactor;
2394  unsigned ResCount;
2395  if (ZoneCritResIdx) {
2396  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2397  ResCount = getResourceCount(ZoneCritResIdx);
2398  } else {
2399  ResFactor = SchedModel->getMicroOpFactor();
2400  ResCount = RetiredMOps * ResFactor;
2401  }
2402  unsigned LFactor = SchedModel->getLatencyFactor();
2403  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2404  << " Retired: " << RetiredMOps;
2405  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2406  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2407  << ResCount / ResFactor << " "
2408  << SchedModel->getResourceName(ZoneCritResIdx)
2409  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2410  << (IsResourceLimited ? " - Resource" : " - Latency")
2411  << " limited.\n";
2412 }
2413 #endif
2414 
2415 //===----------------------------------------------------------------------===//
2416 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2417 //===----------------------------------------------------------------------===//
2418 
2421  const TargetSchedModel *SchedModel) {
2422  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2423  return;
2424 
2425  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2427  PI = SchedModel->getWriteProcResBegin(SC),
2428  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2429  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2430  ResDelta.CritResources += PI->Cycles;
2431  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2432  ResDelta.DemandedResources += PI->Cycles;
2433  }
2434 }
2435 
2436 /// Compute remaining latency. We need this both to determine whether the
2437 /// overall schedule has become latency-limited and whether the instructions
2438 /// outside this zone are resource or latency limited.
2439 ///
2440 /// The "dependent" latency is updated incrementally during scheduling as the
2441 /// max height/depth of scheduled nodes minus the cycles since it was
2442 /// scheduled:
2443 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2444 ///
2445 /// The "independent" latency is the max ready queue depth:
2446 /// ILat = max N.depth for N in Available|Pending
2447 ///
2448 /// RemainingLatency is the greater of independent and dependent latency.
2449 ///
2450 /// These computations are expensive, especially in DAGs with many edges, so
2451 /// only do them if necessary.
2452 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2453  unsigned RemLatency = CurrZone.getDependentLatency();
2454  RemLatency = std::max(RemLatency,
2455  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2456  RemLatency = std::max(RemLatency,
2457  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2458  return RemLatency;
2459 }
2460 
2461 /// Returns true if the current cycle plus remaning latency is greater than
2462 /// the critical path in the scheduling region.
2463 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2464  SchedBoundary &CurrZone,
2465  bool ComputeRemLatency,
2466  unsigned &RemLatency) const {
2467  // The current cycle is already greater than the critical path, so we are
2468  // already latency limited and don't need to compute the remaining latency.
2469  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2470  return true;
2471 
2472  // If we haven't scheduled anything yet, then we aren't latency limited.
2473  if (CurrZone.getCurrCycle() == 0)
2474  return false;
2475 
2476  if (ComputeRemLatency)
2477  RemLatency = computeRemLatency(CurrZone);
2478 
2479  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2480 }
2481 
2482 /// Set the CandPolicy given a scheduling zone given the current resources and
2483 /// latencies inside and outside the zone.
2484 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2485  SchedBoundary &CurrZone,
2486  SchedBoundary *OtherZone) {
2487  // Apply preemptive heuristics based on the total latency and resources
2488  // inside and outside this zone. Potential stalls should be considered before
2489  // following this policy.
2490 
2491  // Compute the critical resource outside the zone.
2492  unsigned OtherCritIdx = 0;
2493  unsigned OtherCount =
2494  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2495 
2496  bool OtherResLimited = false;
2497  unsigned RemLatency = 0;
2498  bool RemLatencyComputed = false;
2499  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2500  RemLatency = computeRemLatency(CurrZone);
2501  RemLatencyComputed = true;
2502  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2503  OtherCount, RemLatency);
2504  }
2505 
2506  // Schedule aggressively for latency in PostRA mode. We don't check for
2507  // acyclic latency during PostRA, and highly out-of-order processors will
2508  // skip PostRA scheduling.
2509  if (!OtherResLimited &&
2510  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2511  RemLatency))) {
2512  Policy.ReduceLatency |= true;
2513  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2514  << " RemainingLatency " << RemLatency << " + "
2515  << CurrZone.getCurrCycle() << "c > CritPath "
2516  << Rem.CriticalPath << "\n");
2517  }
2518  // If the same resource is limiting inside and outside the zone, do nothing.
2519  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2520  return;
2521 
2522  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2523  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2524  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2525  } if (OtherResLimited) dbgs()
2526  << " RemainingLimit: "
2527  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2528  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2529  << " Latency limited both directions.\n");
2530 
2531  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2532  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2533 
2534  if (OtherResLimited)
2535  Policy.DemandResIdx = OtherCritIdx;
2536 }
2537 
2538 #ifndef NDEBUG
2541  switch (Reason) {
2542  case NoCand: return "NOCAND ";
2543  case Only1: return "ONLY1 ";
2544  case PhysReg: return "PHYS-REG ";
2545  case RegExcess: return "REG-EXCESS";
2546  case RegCritical: return "REG-CRIT ";
2547  case Stall: return "STALL ";
2548  case Cluster: return "CLUSTER ";
2549  case Weak: return "WEAK ";
2550  case RegMax: return "REG-MAX ";
2551  case ResourceReduce: return "RES-REDUCE";
2552  case ResourceDemand: return "RES-DEMAND";
2553  case TopDepthReduce: return "TOP-DEPTH ";
2554  case TopPathReduce: return "TOP-PATH ";
2555  case BotHeightReduce:return "BOT-HEIGHT";
2556  case BotPathReduce: return "BOT-PATH ";
2557  case NextDefUse: return "DEF-USE ";
2558  case NodeOrder: return "ORDER ";
2559  };
2560  llvm_unreachable("Unknown reason!");
2561 }
2562 
2564  PressureChange P;
2565  unsigned ResIdx = 0;
2566  unsigned Latency = 0;
2567  switch (Cand.Reason) {
2568  default:
2569  break;
2570  case RegExcess:
2571  P = Cand.RPDelta.Excess;
2572  break;
2573  case RegCritical:
2574  P = Cand.RPDelta.CriticalMax;
2575  break;
2576  case RegMax:
2577  P = Cand.RPDelta.CurrentMax;
2578  break;
2579  case ResourceReduce:
2580  ResIdx = Cand.Policy.ReduceResIdx;
2581  break;
2582  case ResourceDemand:
2583  ResIdx = Cand.Policy.DemandResIdx;
2584  break;
2585  case TopDepthReduce:
2586  Latency = Cand.SU->getDepth();
2587  break;
2588  case TopPathReduce:
2589  Latency = Cand.SU->getHeight();
2590  break;
2591  case BotHeightReduce:
2592  Latency = Cand.SU->getHeight();
2593  break;
2594  case BotPathReduce:
2595  Latency = Cand.SU->getDepth();
2596  break;
2597  }
2598  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2599  if (P.isValid())
2600  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2601  << ":" << P.getUnitInc() << " ";
2602  else
2603  dbgs() << " ";
2604  if (ResIdx)
2605  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2606  else
2607  dbgs() << " ";
2608  if (Latency)
2609  dbgs() << " " << Latency << " cycles ";
2610  else
2611  dbgs() << " ";
2612  dbgs() << '\n';
2613 }
2614 #endif
2615 
2616 namespace llvm {
2617 /// Return true if this heuristic determines order.
2618 bool tryLess(int TryVal, int CandVal,
2622  if (TryVal < CandVal) {
2623  TryCand.Reason = Reason;
2624  return true;
2625  }
2626  if (TryVal > CandVal) {
2627  if (Cand.Reason > Reason)
2628  Cand.Reason = Reason;
2629  return true;
2630  }
2631  return false;
2632 }
2633 
2634 bool tryGreater(int TryVal, int CandVal,
2638  if (TryVal > CandVal) {
2639  TryCand.Reason = Reason;
2640  return true;
2641  }
2642  if (TryVal < CandVal) {
2643  if (Cand.Reason > Reason)
2644  Cand.Reason = Reason;
2645  return true;
2646  }
2647  return false;
2648 }
2649 
2652  SchedBoundary &Zone) {
2653  if (Zone.isTop()) {
2654  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2655  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2656  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2657  return true;
2658  }
2659  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2660  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2661  return true;
2662  } else {
2663  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2664  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2665  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2666  return true;
2667  }
2668  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2669  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2670  return true;
2671  }
2672  return false;
2673 }
2674 } // end namespace llvm
2675 
2676 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2677  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2678  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2679 }
2680 
2682  tracePick(Cand.Reason, Cand.AtTop);
2683 }
2684 
2686  assert(dag->hasVRegLiveness() &&
2687  "(PreRA)GenericScheduler needs vreg liveness");
2688  DAG = static_cast<ScheduleDAGMILive*>(dag);
2689  SchedModel = DAG->getSchedModel();
2690  TRI = DAG->TRI;
2691 
2692  Rem.init(DAG, SchedModel);
2693  Top.init(DAG, SchedModel, &Rem);
2694  Bot.init(DAG, SchedModel, &Rem);
2695 
2696  // Initialize resource counts.
2697 
2698  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2699  // are disabled, then these HazardRecs will be disabled.
2700  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2701  if (!Top.HazardRec) {
2702  Top.HazardRec =
2703  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2704  Itin, DAG);
2705  }
2706  if (!Bot.HazardRec) {
2707  Bot.HazardRec =
2708  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2709  Itin, DAG);
2710  }
2711  TopCand.SU = nullptr;
2712  BotCand.SU = nullptr;
2713 }
2714 
2715 /// Initialize the per-region scheduling policy.
2718  unsigned NumRegionInstrs) {
2719  const MachineFunction &MF = *Begin->getMF();
2720  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2721 
2722  // Avoid setting up the register pressure tracker for small regions to save
2723  // compile time. As a rough heuristic, only track pressure when the number of
2724  // schedulable instructions exceeds half the integer register file.
2725  RegionPolicy.ShouldTrackPressure = true;
2726  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2727  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2728  if (TLI->isTypeLegal(LegalIntVT)) {
2729  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2730  TLI->getRegClassFor(LegalIntVT));
2731  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2732  }
2733  }
2734 
2735  // For generic targets, we default to bottom-up, because it's simpler and more
2736  // compile-time optimizations have been implemented in that direction.
2737  RegionPolicy.OnlyBottomUp = true;
2738 
2739  // Allow the subtarget to override default policy.
2740  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2741 
2742  // After subtarget overrides, apply command line options.
2743  if (!EnableRegPressure)
2744  RegionPolicy.ShouldTrackPressure = false;
2745 
2746  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2747  // e.g. -misched-bottomup=false allows scheduling in both directions.
2749  "-misched-topdown incompatible with -misched-bottomup");
2750  if (ForceBottomUp.getNumOccurrences() > 0) {
2751  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2752  if (RegionPolicy.OnlyBottomUp)
2753  RegionPolicy.OnlyTopDown = false;
2754  }
2755  if (ForceTopDown.getNumOccurrences() > 0) {
2756  RegionPolicy.OnlyTopDown = ForceTopDown;
2757  if (RegionPolicy.OnlyTopDown)
2758  RegionPolicy.OnlyBottomUp = false;
2759  }
2760 }
2761 
2763  // Cannot completely remove virtual function even in release mode.
2764 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2765  dbgs() << "GenericScheduler RegionPolicy: "
2766  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2767  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2768  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2769  << "\n";
2770 #endif
2771 }
2772 
2773 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2774 /// critical path by more cycles than it takes to drain the instruction buffer.
2775 /// We estimate an upper bounds on in-flight instructions as:
2776 ///
2777 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2778 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2779 /// InFlightResources = InFlightIterations * LoopResources
2780 ///
2781 /// TODO: Check execution resources in addition to IssueCount.
2783  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2784  return;
2785 
2786  // Scaled number of cycles per loop iteration.
2787  unsigned IterCount =
2788  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2789  Rem.RemIssueCount);
2790  // Scaled acyclic critical path.
2791  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2792  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2793  unsigned InFlightCount =
2794  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2795  unsigned BufferLimit =
2796  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2797 
2798  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2799 
2800  LLVM_DEBUG(
2801  dbgs() << "IssueCycles="
2802  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2803  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2804  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2805  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2806  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2807  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2808 }
2809 
2811  Rem.CriticalPath = DAG->ExitSU.getDepth();
2812 
2813  // Some roots may not feed into ExitSU. Check all of them in case.
2814  for (const SUnit *SU : Bot.Available) {
2815  if (SU->getDepth() > Rem.CriticalPath)
2816  Rem.CriticalPath = SU->getDepth();
2817  }
2818  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2819  if (DumpCriticalPathLength) {
2820  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2821  }
2822 
2823  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2824  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2825  checkAcyclicLatency();
2826  }
2827 }
2828 
2829 namespace llvm {
2830 bool tryPressure(const PressureChange &TryP,
2831  const PressureChange &CandP,
2835  const TargetRegisterInfo *TRI,
2836  const MachineFunction &MF) {
2837  // If one candidate decreases and the other increases, go with it.
2838  // Invalid candidates have UnitInc==0.
2839  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2840  Reason)) {
2841  return true;
2842  }
2843  // Do not compare the magnitude of pressure changes between top and bottom
2844  // boundary.
2845  if (Cand.AtTop != TryCand.AtTop)
2846  return false;
2847 
2848  // If both candidates affect the same set in the same boundary, go with the
2849  // smallest increase.
2850  unsigned TryPSet = TryP.getPSetOrMax();
2851  unsigned CandPSet = CandP.getPSetOrMax();
2852  if (TryPSet == CandPSet) {
2853  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2854  Reason);
2855  }
2856 
2857  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2859 
2860  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2862 
2863  // If the candidates are decreasing pressure, reverse priority.
2864  if (TryP.getUnitInc() < 0)
2865  std::swap(TryRank, CandRank);
2866  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2867 }
2868 
2869 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2870  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2871 }
2872 
2873 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2874 /// their physreg def/use.
2875 ///
2876 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2877 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2878 /// with the operation that produces or consumes the physreg. We'll do this when
2879 /// regalloc has support for parallel copies.
2880 int biasPhysReg(const SUnit *SU, bool isTop) {
2881  const MachineInstr *MI = SU->getInstr();
2882 
2883  if (MI->isCopy()) {
2884  unsigned ScheduledOper = isTop ? 1 : 0;
2885  unsigned UnscheduledOper = isTop ? 0 : 1;
2886  // If we have already scheduled the physreg produce/consumer, immediately
2887  // schedule the copy.
2889  MI->getOperand(ScheduledOper).getReg()))
2890  return 1;
2891  // If the physreg is at the boundary, defer it. Otherwise schedule it
2892  // immediately to free the dependent. We can hoist the copy later.
2893  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2895  MI->getOperand(UnscheduledOper).getReg()))
2896  return AtBoundary ? -1 : 1;
2897  }
2898 
2899  if (MI->isMoveImmediate()) {
2900  // If we have a move immediate and all successors have been assigned, bias
2901  // towards scheduling this later. Make sure all register defs are to
2902  // physical registers.
2903  bool DoBias = true;
2904  for (const MachineOperand &Op : MI->defs()) {
2905  if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
2906  DoBias = false;
2907  break;
2908  }
2909  }
2910 
2911  if (DoBias)
2912  return isTop ? -1 : 1;
2913  }
2914 
2915  return 0;
2916 }
2917 } // end namespace llvm
2918 
2920  bool AtTop,
2921  const RegPressureTracker &RPTracker,
2922  RegPressureTracker &TempTracker) {
2923  Cand.SU = SU;
2924  Cand.AtTop = AtTop;
2925  if (DAG->isTrackingPressure()) {
2926  if (AtTop) {
2927  TempTracker.getMaxDownwardPressureDelta(
2928  Cand.SU->getInstr(),
2929  Cand.RPDelta,
2930  DAG->getRegionCriticalPSets(),
2932  } else {
2933  if (VerifyScheduling) {
2934  TempTracker.getMaxUpwardPressureDelta(
2935  Cand.SU->getInstr(),
2936  &DAG->getPressureDiff(Cand.SU),
2937  Cand.RPDelta,
2938  DAG->getRegionCriticalPSets(),
2940  } else {
2941  RPTracker.getUpwardPressureDelta(
2942  Cand.SU->getInstr(),
2943  DAG->getPressureDiff(Cand.SU),
2944  Cand.RPDelta,
2945  DAG->getRegionCriticalPSets(),
2947  }
2948  }
2949  }
2950  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2951  << " Try SU(" << Cand.SU->NodeNum << ") "
2952  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2953  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2954 }
2955 
2956 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2957 /// hierarchical. This may be more efficient than a graduated cost model because
2958 /// we don't need to evaluate all aspects of the model for each node in the
2959 /// queue. But it's really done to make the heuristics easier to debug and
2960 /// statistically analyze.
2961 ///
2962 /// \param Cand provides the policy and current best candidate.
2963 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2964 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2965 // if Cand is from a different zone than TryCand.
2967  SchedCandidate &TryCand,
2968  SchedBoundary *Zone) const {
2969  // Initialize the candidate if needed.
2970  if (!Cand.isValid()) {
2971  TryCand.Reason = NodeOrder;
2972  return;
2973  }
2974 
2975  // Bias PhysReg Defs and copies to their uses and defined respectively.
2976  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
2977  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
2978  return;
2979 
2980  // Avoid exceeding the target's limit.
2981  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2982  Cand.RPDelta.Excess,
2983  TryCand, Cand, RegExcess, TRI,
2984  DAG->MF))
2985  return;
2986 
2987  // Avoid increasing the max critical pressure in the scheduled region.
2988  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2989  Cand.RPDelta.CriticalMax,
2990  TryCand, Cand, RegCritical, TRI,
2991  DAG->MF))
2992  return;
2993 
2994  // We only compare a subset of features when comparing nodes between
2995  // Top and Bottom boundary. Some properties are simply incomparable, in many
2996  // other instances we should only override the other boundary if something
2997  // is a clear good pick on one boundary. Skip heuristics that are more
2998  // "tie-breaking" in nature.
2999  bool SameBoundary = Zone != nullptr;
3000  if (SameBoundary) {
3001  // For loops that are acyclic path limited, aggressively schedule for
3002  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3003  // heuristics to take precedence.
3004  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3005  tryLatency(TryCand, Cand, *Zone))
3006  return;
3007 
3008  // Prioritize instructions that read unbuffered resources by stall cycles.
3009  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3010  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3011  return;
3012  }
3013 
3014  // Keep clustered nodes together to encourage downstream peephole
3015  // optimizations which may reduce resource requirements.
3016  //
3017  // This is a best effort to set things up for a post-RA pass. Optimizations
3018  // like generating loads of multiple registers should ideally be done within
3019  // the scheduler pass by combining the loads during DAG postprocessing.
3020  const SUnit *CandNextClusterSU =
3021  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3022  const SUnit *TryCandNextClusterSU =
3023  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3024  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3025  Cand.SU == CandNextClusterSU,
3026  TryCand, Cand, Cluster))
3027  return;
3028 
3029  if (SameBoundary) {
3030  // Weak edges are for clustering and other constraints.
3031  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3032  getWeakLeft(Cand.SU, Cand.AtTop),
3033  TryCand, Cand, Weak))
3034  return;
3035  }
3036 
3037  // Avoid increasing the max pressure of the entire region.
3038  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3039  Cand.RPDelta.CurrentMax,
3040  TryCand, Cand, RegMax, TRI,
3041  DAG->MF))
3042  return;
3043 
3044  if (SameBoundary) {
3045  // Avoid critical resource consumption and balance the schedule.
3046  TryCand.initResourceDelta(DAG, SchedModel);
3048  TryCand, Cand, ResourceReduce))
3049  return;
3052  TryCand, Cand, ResourceDemand))
3053  return;
3054 
3055  // Avoid serializing long latency dependence chains.
3056  // For acyclic path limited loops, latency was already checked above.
3057  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3058  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3059  return;
3060 
3061  // Fall through to original instruction order.
3062  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3063  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3064  TryCand.Reason = NodeOrder;
3065  }
3066  }
3067 }
3068 
3069 /// Pick the best candidate from the queue.
3070 ///
3071 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3072 /// DAG building. To adjust for the current scheduling location we need to
3073 /// maintain the number of vreg uses remaining to be top-scheduled.
3075  const CandPolicy &ZonePolicy,
3076  const RegPressureTracker &RPTracker,
3077  SchedCandidate &Cand) {
3078  // getMaxPressureDelta temporarily modifies the tracker.
3079  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3080 
3081  ReadyQueue &Q = Zone.Available;
3082  for (SUnit *SU : Q) {
3083 
3084  SchedCandidate TryCand(ZonePolicy);
3085  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3086  // Pass SchedBoundary only when comparing nodes from the same boundary.
3087  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3088  tryCandidate(Cand, TryCand, ZoneArg);
3089  if (TryCand.Reason != NoCand) {
3090  // Initialize resource delta if needed in case future heuristics query it.
3091  if (TryCand.ResDelta == SchedResourceDelta())
3092  TryCand.initResourceDelta(DAG, SchedModel);
3093  Cand.setBest(TryCand);
3094  LLVM_DEBUG(traceCandidate(Cand));
3095  }
3096  }
3097 }
3098 
3099 /// Pick the best candidate node from either the top or bottom queue.
3101  // Schedule as far as possible in the direction of no choice. This is most
3102  // efficient, but also provides the best heuristics for CriticalPSets.
3103  if (SUnit *SU = Bot.pickOnlyChoice()) {
3104  IsTopNode = false;
3105  tracePick(Only1, false);
3106  return SU;
3107  }
3108  if (SUnit *SU = Top.pickOnlyChoice()) {
3109  IsTopNode = true;
3110  tracePick(Only1, true);
3111  return SU;
3112  }
3113  // Set the bottom-up policy based on the state of the current bottom zone and
3114  // the instructions outside the zone, including the top zone.
3115  CandPolicy BotPolicy;
3116  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3117  // Set the top-down policy based on the state of the current top zone and
3118  // the instructions outside the zone, including the bottom zone.
3119  CandPolicy TopPolicy;
3120  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3121 
3122  // See if BotCand is still valid (because we previously scheduled from Top).
3123  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3124  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3125  BotCand.Policy != BotPolicy) {
3126  BotCand.reset(CandPolicy());
3127  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3128  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3129  } else {
3130  LLVM_DEBUG(traceCandidate(BotCand));
3131 #ifndef NDEBUG
3132  if (VerifyScheduling) {
3133  SchedCandidate TCand;
3134  TCand.reset(CandPolicy());
3135  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3136  assert(TCand.SU == BotCand.SU &&
3137  "Last pick result should correspond to re-picking right now");
3138  }
3139 #endif
3140  }
3141 
3142  // Check if the top Q has a better candidate.
3143  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3144  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3145  TopCand.Policy != TopPolicy) {
3146  TopCand.reset(CandPolicy());
3147  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3148  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3149  } else {
3150  LLVM_DEBUG(traceCandidate(TopCand));
3151 #ifndef NDEBUG
3152  if (VerifyScheduling) {
3153  SchedCandidate TCand;
3154  TCand.reset(CandPolicy());
3155  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3156  assert(TCand.SU == TopCand.SU &&
3157  "Last pick result should correspond to re-picking right now");
3158  }
3159 #endif
3160  }
3161 
3162  // Pick best from BotCand and TopCand.
3163  assert(BotCand.isValid());
3164  assert(TopCand.isValid());
3165  SchedCandidate Cand = BotCand;
3166  TopCand.Reason = NoCand;
3167  tryCandidate(Cand, TopCand, nullptr);
3168  if (TopCand.Reason != NoCand) {
3169  Cand.setBest(TopCand);
3170  LLVM_DEBUG(traceCandidate(Cand));
3171  }
3172 
3173  IsTopNode = Cand.AtTop;
3174  tracePick(Cand);
3175  return Cand.SU;
3176 }
3177 
3178 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3180  if (DAG->top() == DAG->bottom()) {
3181  assert(Top.Available.empty() && Top.Pending.empty() &&
3182  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3183  return nullptr;
3184  }
3185  SUnit *SU;
3186  do {
3187  if (RegionPolicy.OnlyTopDown) {
3188  SU = Top.pickOnlyChoice();
3189  if (!SU) {
3190  CandPolicy NoPolicy;
3191  TopCand.reset(NoPolicy);
3192  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3193  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3194  tracePick(TopCand);
3195  SU = TopCand.SU;
3196  }
3197  IsTopNode = true;
3198  } else if (RegionPolicy.OnlyBottomUp) {
3199  SU = Bot.pickOnlyChoice();
3200  if (!SU) {
3201  CandPolicy NoPolicy;
3202  BotCand.reset(NoPolicy);
3203  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3204  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3205  tracePick(BotCand);
3206  SU = BotCand.SU;
3207  }
3208  IsTopNode = false;
3209  } else {
3210  SU = pickNodeBidirectional(IsTopNode);
3211  }
3212  } while (SU->isScheduled);
3213 
3214  if (SU->isTopReady())
3215  Top.removeReady(SU);
3216  if (SU->isBottomReady())
3217  Bot.removeReady(SU);
3218 
3219  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3220  << *SU->getInstr());
3221  return SU;
3222 }
3223 
3225  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3226  if (!isTop)
3227  ++InsertPos;
3228  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3229 
3230  // Find already scheduled copies with a single physreg dependence and move
3231  // them just above the scheduled instruction.
3232  for (SDep &Dep : Deps) {
3233  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3234  continue;
3235  SUnit *DepSU = Dep.getSUnit();
3236  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3237  continue;
3238  MachineInstr *Copy = DepSU->getInstr();
3239  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3240  continue;
3241  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3242  DAG->dumpNode(*Dep.getSUnit()));
3243  DAG->moveInstruction(Copy, InsertPos);
3244  }
3245 }
3246 
3247 /// Update the scheduler's state after scheduling a node. This is the same node
3248 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3249 /// update it's state based on the current cycle before MachineSchedStrategy
3250 /// does.
3251 ///
3252 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3253 /// them here. See comments in biasPhysReg.
3254 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3255  if (IsTopNode) {
3256  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3257  Top.bumpNode(SU);
3258  if (SU->hasPhysRegUses)
3259  reschedulePhysReg(SU, true);
3260  } else {
3261  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3262  Bot.bumpNode(SU);
3263  if (SU->hasPhysRegDefs)
3264  reschedulePhysReg(SU, false);
3265  }
3266 }
3267 
3268 /// Create the standard converging machine scheduler. This will be used as the
3269 /// default scheduler if the target does not set a default.
3271  ScheduleDAGMILive *DAG =
3272  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3273  // Register DAG post-processors.
3274  //
3275  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3276  // data and pass it to later mutations. Have a single mutation that gathers
3277  // the interesting nodes in one pass.
3279  return DAG;
3280 }
3281 
3283  return createGenericSchedLive(C);
3284 }
3285 
3286 static MachineSchedRegistry
3287 GenericSchedRegistry("converge", "Standard converging scheduler.",
3289 
3290 //===----------------------------------------------------------------------===//
3291 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3292 //===----------------------------------------------------------------------===//
3293 
3295  DAG = Dag;
3296  SchedModel = DAG->getSchedModel();
3297  TRI = DAG->TRI;
3298 
3299  Rem.init(DAG, SchedModel);
3300  Top.init(DAG, SchedModel, &Rem);
3301  BotRoots.clear();
3302 
3303  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3304  // or are disabled, then these HazardRecs will be disabled.
3305  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3306  if (!Top.HazardRec) {
3307  Top.HazardRec =
3309  Itin, DAG);
3310  }
3311 }
3312 
3314  Rem.CriticalPath = DAG->ExitSU.getDepth();
3315 
3316  // Some roots may not feed into ExitSU. Check all of them in case.
3317  for (const SUnit *SU : BotRoots) {
3318  if (SU->getDepth() > Rem.CriticalPath)
3319  Rem.CriticalPath = SU->getDepth();
3320  }
3321  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3322  if (DumpCriticalPathLength) {
3323  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3324  }
3325 }
3326 
3327 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3328 ///
3329 /// \param Cand provides the policy and current best candidate.
3330 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3332  SchedCandidate &TryCand) {
3333  // Initialize the candidate if needed.
3334  if (!Cand.isValid()) {
3335  TryCand.Reason = NodeOrder;
3336  return;
3337  }
3338 
3339  // Prioritize instructions that read unbuffered resources by stall cycles.
3340  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3341  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3342  return;
3343 
3344  // Keep clustered nodes together.
3345  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3346  Cand.SU == DAG->getNextClusterSucc(),
3347  TryCand, Cand, Cluster))
3348  return;
3349 
3350  // Avoid critical resource consumption and balance the schedule.
3351  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3352  TryCand, Cand, ResourceReduce))
3353  return;
3355  Cand.ResDelta.DemandedResources,
3356  TryCand, Cand, ResourceDemand))
3357  return;
3358 
3359  // Avoid serializing long latency dependence chains.
3360  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3361  return;
3362  }
3363 
3364  // Fall through to original instruction order.
3365  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3366  TryCand.Reason = NodeOrder;
3367 }
3368 
3370  ReadyQueue &Q = Top.Available;
3371  for (SUnit *SU : Q) {
3372  SchedCandidate TryCand(Cand.Policy);
3373  TryCand.SU = SU;
3374  TryCand.AtTop = true;
3375  TryCand.initResourceDelta(DAG, SchedModel);
3376  tryCandidate(Cand, TryCand);
3377  if (TryCand.Reason != NoCand) {
3378  Cand.setBest(TryCand);
3379  LLVM_DEBUG(traceCandidate(Cand));
3380  }
3381  }
3382 }
3383 
3384 /// Pick the next node to schedule.
3386  if (DAG->top() == DAG->bottom()) {
3387  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3388  return nullptr;
3389  }
3390  SUnit *SU;
3391  do {
3392  SU = Top.pickOnlyChoice();
3393  if (SU) {
3394  tracePick(Only1, true);
3395  } else {
3396  CandPolicy NoPolicy;
3397  SchedCandidate TopCand(NoPolicy);
3398  // Set the top-down policy based on the state of the current top zone and
3399  // the instructions outside the zone, including the bottom zone.
3400  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3401  pickNodeFromQueue(TopCand);
3402  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3403  tracePick(TopCand);
3404  SU = TopCand.SU;
3405  }
3406  } while (SU->isScheduled);
3407 
3408  IsTopNode = true;
3409  Top.removeReady(SU);
3410 
3411  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3412  << *SU->getInstr());
3413  return SU;
3414 }
3415 
3416 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3417 /// scheduled/remaining flags in the DAG nodes.
3418 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3419  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3420  Top.bumpNode(SU);
3421 }
3422 
3424  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3425  /*RemoveKillFlags=*/true);
3426 }
3427 
3428 //===----------------------------------------------------------------------===//
3429 // ILP Scheduler. Currently for experimental analysis of heuristics.
3430 //===----------------------------------------------------------------------===//
3431 
3432 namespace {
3433 
3434 /// Order nodes by the ILP metric.
3435 struct ILPOrder {
3436  const SchedDFSResult *DFSResult = nullptr;
3437  const BitVector *ScheduledTrees = nullptr;
3438  bool MaximizeILP;
3439 
3440  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3441 
3442  /// Apply a less-than relation on node priority.
3443  ///
3444  /// (Return true if A comes after B in the Q.)
3445  bool operator()(const SUnit *A, const SUnit *B) const {
3446  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3447  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3448  if (SchedTreeA != SchedTreeB) {
3449  // Unscheduled trees have lower priority.
3450  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3451  return ScheduledTrees->test(SchedTreeB);
3452 
3453  // Trees with shallower connections have have lower priority.
3454  if (DFSResult->getSubtreeLevel(SchedTreeA)
3455  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3456  return DFSResult->getSubtreeLevel(SchedTreeA)
3457  < DFSResult->getSubtreeLevel(SchedTreeB);
3458  }
3459  }
3460  if (MaximizeILP)
3461  return DFSResult->getILP(A) < DFSResult->getILP(B);
3462  else
3463  return DFSResult->getILP(A) > DFSResult->getILP(B);
3464  }
3465 };
3466 
3467 /// Schedule based on the ILP metric.
3468 class ILPScheduler : public MachineSchedStrategy {
3469  ScheduleDAGMILive *DAG = nullptr;
3470  ILPOrder Cmp;
3471 
3472  std::vector<SUnit*> ReadyQ;
3473 
3474 public:
3475  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3476 
3477  void initialize(ScheduleDAGMI *dag) override {
3478  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3479  DAG = static_cast<ScheduleDAGMILive*>(dag);
3480  DAG->computeDFSResult();
3481  Cmp.DFSResult = DAG->getDFSResult();
3482  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3483  ReadyQ.clear();
3484  }
3485 
3486  void registerRoots() override {
3487  // Restore the heap in ReadyQ with the updated DFS results.
3488  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3489  }
3490 
3491  /// Implement MachineSchedStrategy interface.
3492  /// -----------------------------------------
3493 
3494  /// Callback to select the highest priority node from the ready Q.
3495  SUnit *pickNode(bool &IsTopNode) override {
3496  if (ReadyQ.empty()) return nullptr;
3497  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3498  SUnit *SU = ReadyQ.back();
3499  ReadyQ.pop_back();
3500  IsTopNode = false;
3501  LLVM_DEBUG(dbgs() << "Pick node "
3502  << "SU(" << SU->NodeNum << ") "
3503  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3504  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3505  << " @"
3506  << DAG->getDFSResult()->getSubtreeLevel(
3507  DAG->getDFSResult()->getSubtreeID(SU))
3508  << '\n'
3509  << "Scheduling " << *SU->getInstr());
3510  return SU;
3511  }
3512 
3513  /// Scheduler callback to notify that a new subtree is scheduled.
3514  void scheduleTree(unsigned SubtreeID) override {
3515  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3516  }
3517 
3518  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3519  /// DFSResults, and resort the priority Q.
3520  void schedNode(SUnit *SU, bool IsTopNode) override {
3521  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3522  }
3523 
3524  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3525 
3526  void releaseBottomNode(SUnit *SU) override {
3527  ReadyQ.push_back(SU);
3528  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3529  }
3530 };
3531 
3532 } // end anonymous namespace
3533 
3535  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3536 }
3538  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3539 }
3540 
3542  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3544  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3545 
3546 //===----------------------------------------------------------------------===//
3547 // Machine Instruction Shuffler for Correctness Testing
3548 //===----------------------------------------------------------------------===//
3549 
3550 #ifndef NDEBUG
3551 namespace {
3552 
3553 /// Apply a less-than relation on the node order, which corresponds to the
3554 /// instruction order prior to scheduling. IsReverse implements greater-than.
3555 template<bool IsReverse>
3556 struct SUnitOrder {
3557  bool operator()(SUnit *A, SUnit *B) const {
3558  if (IsReverse)
3559  return A->NodeNum > B->NodeNum;
3560  else
3561  return A->NodeNum < B->NodeNum;
3562  }
3563 };
3564 
3565 /// Reorder instructions as much as possible.
3566 class InstructionShuffler : public MachineSchedStrategy {
3567  bool IsAlternating;
3568  bool IsTopDown;
3569 
3570  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3571  // gives nodes with a higher number higher priority causing the latest
3572  // instructions to be scheduled first.
3573  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3574  TopQ;
3575 
3576  // When scheduling bottom-up, use greater-than as the queue priority.
3578  BottomQ;
3579 
3580 public:
3581  InstructionShuffler(bool alternate, bool topdown)
3582  : IsAlternating(alternate), IsTopDown(topdown) {}
3583 
3584  void initialize(ScheduleDAGMI*) override {
3585  TopQ.clear();
3586  BottomQ.clear();
3587  }
3588 
3589  /// Implement MachineSchedStrategy interface.
3590  /// -----------------------------------------
3591 
3592  SUnit *pickNode(bool &IsTopNode) override {
3593  SUnit *SU;
3594  if (IsTopDown) {
3595  do {
3596  if (TopQ.empty()) return nullptr;
3597  SU = TopQ.top();
3598  TopQ.pop();
3599  } while (SU->isScheduled);
3600  IsTopNode = true;
3601  } else {
3602  do {
3603  if (BottomQ.empty()) return nullptr;
3604  SU = BottomQ.top();
3605  BottomQ.pop();
3606  } while (SU->isScheduled);
3607  IsTopNode = false;
3608  }
3609  if (IsAlternating)
3610  IsTopDown = !IsTopDown;
3611  return SU;
3612  }
3613 
3614  void schedNode(SUnit *SU, bool IsTopNode) override {}
3615 
3616  void releaseTopNode(SUnit *SU) override {
3617  TopQ.push(SU);
3618  }
3619  void releaseBottomNode(SUnit *SU) override {
3620  BottomQ.push(SU);
3621  }
3622 };
3623 
3624 } // end anonymous namespace
3625 
3627  bool Alternate = !ForceTopDown && !ForceBottomUp;
3628  bool TopDown = !ForceBottomUp;
3629  assert((TopDown || !ForceTopDown) &&
3630  "-misched-topdown incompatible with -misched-bottomup");
3631  return new ScheduleDAGMILive(
3632  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3633 }
3634 
3636  "shuffle", "Shuffle machine instructions alternating directions",
3638 #endif // !NDEBUG
3639 
3640 //===----------------------------------------------------------------------===//
3641 // GraphWriter support for ScheduleDAGMILive.
3642 //===----------------------------------------------------------------------===//
3643 
3644 #ifndef NDEBUG
3645 namespace llvm {
3646 
3647 template<> struct GraphTraits<
3649 
3650 template<>
3653 
3654  static std::string getGraphName(const ScheduleDAG *G) {
3655  return G->MF.getName();
3656  }
3657 
3658  static bool renderGraphFromBottomUp() {
3659  return true;
3660  }
3661 
3662  static bool isNodeHidden(const SUnit *Node) {
3663  if (ViewMISchedCutoff == 0)
3664  return false;
3665  return (Node->Preds.size() > ViewMISchedCutoff
3666  || Node->Succs.size() > ViewMISchedCutoff);
3667  }
3668 
3669  /// If you want to override the dot attributes printed for a particular
3670  /// edge, override this method.
3671  static std::string getEdgeAttributes(const SUnit *Node,
3672  SUnitIterator EI,
3673  const ScheduleDAG *Graph) {
3674  if (EI.isArtificialDep())
3675  return "color=cyan,style=dashed";
3676  if (EI.isCtrlDep())
3677  return "color=blue,style=dashed";
3678  return "";
3679  }
3680 
3681  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3682  std::string Str;
3683  raw_string_ostream SS(Str);
3684  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3685  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3686  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3687  SS << "SU:" << SU->NodeNum;
3688  if (DFS)
3689  SS << " I:" << DFS->getNumInstrs(SU);
3690  return SS.str();
3691  }
3692 
3693  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3694  return G->getGraphNodeLabel(SU);
3695  }
3696 
3697  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3698  std::string Str("shape=Mrecord");
3699  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3700  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3701  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3702  if (DFS) {
3703  Str += ",style=filled,fillcolor=\"#";
3704  Str += DOT::getColorString(DFS->getSubtreeID(N));
3705  Str += '"';
3706  }
3707  return Str;
3708  }
3709 };
3710 
3711 } // end namespace llvm
3712 #endif // NDEBUG
3713 
3714 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3715 /// rendered using 'dot'.
3716 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3717 #ifndef NDEBUG
3718  ViewGraph(this, Name, false, Title);
3719 #else
3720  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3721  << "systems with Graphviz or gv!\n";
3722 #endif // NDEBUG
3723 }
3724 
3725 /// Out-of-line implementation with no arguments is handy for gdb.
3727  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3728 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:258
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:249
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:464
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:207
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:211
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1185
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:328
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:528
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:379
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:383
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
Definition: MachineInstr.h:998
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:210
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:416
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:63
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:210
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:806
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:325
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:372
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:505
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:343