LLVM  8.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
244 
245 /// A dummy default scheduler factory indicates whether the scheduler
246 /// is overridden on the command line.
248  return nullptr;
249 }
250 
251 /// MachineSchedOpt allows command line selection of the scheduler.
254 MachineSchedOpt("misched",
256  cl::desc("Machine instruction scheduler to use"));
257 
259 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
261 
263  "enable-misched",
264  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
265  cl::Hidden);
266 
268  "enable-post-misched",
269  cl::desc("Enable the post-ra machine instruction scheduling pass."),
270  cl::init(true), cl::Hidden);
271 
272 /// Decrement this iterator until reaching the top or a non-debug instr.
276  assert(I != Beg && "reached the top of the region, cannot decrement");
277  while (--I != Beg) {
278  if (!I->isDebugInstr())
279  break;
280  }
281  return I;
282 }
283 
284 /// Non-const version.
290 }
291 
292 /// If this iterator is a debug value, increment until reaching the End or a
293 /// non-debug instruction.
297  for(; I != End; ++I) {
298  if (!I->isDebugInstr())
299  break;
300  }
301  return I;
302 }
303 
304 /// Non-const version.
310 }
311 
312 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
313 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
314  // Select the scheduler, or set the default.
315  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
316  if (Ctor != useDefaultMachineSched)
317  return Ctor(this);
318 
319  // Get the default scheduler set by the target for this function.
320  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
321  if (Scheduler)
322  return Scheduler;
323 
324  // Default to GenericScheduler.
325  return createGenericSchedLive(this);
326 }
327 
328 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
329 /// the caller. We don't have a command line option to override the postRA
330 /// scheduler. The Target must configure it.
331 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
332  // Get the postRA scheduler set by the target for this function.
333  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
334  if (Scheduler)
335  return Scheduler;
336 
337  // Default to GenericScheduler.
338  return createGenericSchedPostRA(this);
339 }
340 
341 /// Top-level MachineScheduler pass driver.
342 ///
343 /// Visit blocks in function order. Divide each block into scheduling regions
344 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
345 /// consistent with the DAG builder, which traverses the interior of the
346 /// scheduling regions bottom-up.
347 ///
348 /// This design avoids exposing scheduling boundaries to the DAG builder,
349 /// simplifying the DAG builder's support for "special" target instructions.
350 /// At the same time the design allows target schedulers to operate across
351 /// scheduling boundaries, for example to bundle the boundary instructions
352 /// without reordering them. This creates complexity, because the target
353 /// scheduler must update the RegionBegin and RegionEnd positions cached by
354 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
355 /// design would be to split blocks at scheduling boundaries, but LLVM has a
356 /// general bias against block splitting purely for implementation simplicity.
357 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
358  if (skipFunction(mf.getFunction()))
359  return false;
360 
361  if (EnableMachineSched.getNumOccurrences()) {
362  if (!EnableMachineSched)
363  return false;
364  } else if (!mf.getSubtarget().enableMachineScheduler())
365  return false;
366 
367  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
368 
369  // Initialize the context of the pass.
370  MF = &mf;
371  MLI = &getAnalysis<MachineLoopInfo>();
372  MDT = &getAnalysis<MachineDominatorTree>();
373  PassConfig = &getAnalysis<TargetPassConfig>();
374  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
375 
376  LIS = &getAnalysis<LiveIntervals>();
377 
378  if (VerifyScheduling) {
379  LLVM_DEBUG(LIS->dump());
380  MF->verify(this, "Before machine scheduling.");
381  }
382  RegClassInfo->runOnMachineFunction(*MF);
383 
384  // Instantiate the selected scheduler for this target, function, and
385  // optimization level.
386  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
387  scheduleRegions(*Scheduler, false);
388 
389  LLVM_DEBUG(LIS->dump());
390  if (VerifyScheduling)
391  MF->verify(this, "After machine scheduling.");
392  return true;
393 }
394 
395 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
396  if (skipFunction(mf.getFunction()))
397  return false;
398 
399  if (EnablePostRAMachineSched.getNumOccurrences()) {
400  if (!EnablePostRAMachineSched)
401  return false;
402  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
403  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
404  return false;
405  }
406  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
407 
408  // Initialize the context of the pass.
409  MF = &mf;
410  MLI = &getAnalysis<MachineLoopInfo>();
411  PassConfig = &getAnalysis<TargetPassConfig>();
412 
413  if (VerifyScheduling)
414  MF->verify(this, "Before post machine scheduling.");
415 
416  // Instantiate the selected scheduler for this target, function, and
417  // optimization level.
418  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
419  scheduleRegions(*Scheduler, true);
420 
421  if (VerifyScheduling)
422  MF->verify(this, "After post machine scheduling.");
423  return true;
424 }
425 
426 /// Return true of the given instruction should not be included in a scheduling
427 /// region.
428 ///
429 /// MachineScheduler does not currently support scheduling across calls. To
430 /// handle calls, the DAG builder needs to be modified to create register
431 /// anti/output dependencies on the registers clobbered by the call's regmask
432 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
433 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
434 /// the boundary, but there would be no benefit to postRA scheduling across
435 /// calls this late anyway.
437  MachineBasicBlock *MBB,
438  MachineFunction *MF,
439  const TargetInstrInfo *TII) {
440  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
441 }
442 
443 /// A region of an MBB for scheduling.
444 namespace {
445 struct SchedRegion {
446  /// RegionBegin is the first instruction in the scheduling region, and
447  /// RegionEnd is either MBB->end() or the scheduling boundary after the
448  /// last instruction in the scheduling region. These iterators cannot refer
449  /// to instructions outside of the identified scheduling region because
450  /// those may be reordered before scheduling this region.
451  MachineBasicBlock::iterator RegionBegin;
452  MachineBasicBlock::iterator RegionEnd;
453  unsigned NumRegionInstrs;
454 
456  unsigned N) :
457  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
458 };
459 } // end anonymous namespace
460 
462 
463 static void
465  MBBRegionsVector &Regions,
466  bool RegionsTopDown) {
467  MachineFunction *MF = MBB->getParent();
468  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
469 
470  MachineBasicBlock::iterator I = nullptr;
471  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
472  RegionEnd != MBB->begin(); RegionEnd = I) {
473 
474  // Avoid decrementing RegionEnd for blocks with no terminator.
475  if (RegionEnd != MBB->end() ||
476  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
477  --RegionEnd;
478  }
479 
480  // The next region starts above the previous region. Look backward in the
481  // instruction stream until we find the nearest boundary.
482  unsigned NumRegionInstrs = 0;
483  I = RegionEnd;
484  for (;I != MBB->begin(); --I) {
485  MachineInstr &MI = *std::prev(I);
486  if (isSchedBoundary(&MI, &*MBB, MF, TII))
487  break;
488  if (!MI.isDebugInstr())
489  // MBB::size() uses instr_iterator to count. Here we need a bundle to
490  // count as a single instruction.
491  ++NumRegionInstrs;
492  }
493 
494  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
495  }
496 
497  if (RegionsTopDown)
498  std::reverse(Regions.begin(), Regions.end());
499 }
500 
501 /// Main driver for both MachineScheduler and PostMachineScheduler.
502 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
503  bool FixKillFlags) {
504  // Visit all machine basic blocks.
505  //
506  // TODO: Visit blocks in global postorder or postorder within the bottom-up
507  // loop tree. Then we can optionally compute global RegPressure.
508  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
509  MBB != MBBEnd; ++MBB) {
510 
511  Scheduler.startBlock(&*MBB);
512 
513 #ifndef NDEBUG
514  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
515  continue;
516  if (SchedOnlyBlock.getNumOccurrences()
517  && (int)SchedOnlyBlock != MBB->getNumber())
518  continue;
519 #endif
520 
521  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
522  // points to the scheduling boundary at the bottom of the region. The DAG
523  // does not include RegionEnd, but the region does (i.e. the next
524  // RegionEnd is above the previous RegionBegin). If the current block has
525  // no terminator then RegionEnd == MBB->end() for the bottom region.
526  //
527  // All the regions of MBB are first found and stored in MBBRegions, which
528  // will be processed (MBB) top-down if initialized with true.
529  //
530  // The Scheduler may insert instructions during either schedule() or
531  // exitRegion(), even for empty regions. So the local iterators 'I' and
532  // 'RegionEnd' are invalid across these calls. Instructions must not be
533  // added to other regions than the current one without updating MBBRegions.
534 
535  MBBRegionsVector MBBRegions;
536  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
537  for (MBBRegionsVector::iterator R = MBBRegions.begin();
538  R != MBBRegions.end(); ++R) {
539  MachineBasicBlock::iterator I = R->RegionBegin;
540  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
541  unsigned NumRegionInstrs = R->NumRegionInstrs;
542 
543  // Notify the scheduler of the region, even if we may skip scheduling
544  // it. Perhaps it still needs to be bundled.
545  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
546 
547  // Skip empty scheduling regions (0 or 1 schedulable instructions).
548  if (I == RegionEnd || I == std::prev(RegionEnd)) {
549  // Close the current region. Bundle the terminator if needed.
550  // This invalidates 'RegionEnd' and 'I'.
551  Scheduler.exitRegion();
552  continue;
553  }
554  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
555  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
556  << " " << MBB->getName() << "\n From: " << *I
557  << " To: ";
558  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
559  else dbgs() << "End";
560  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
562  errs() << MF->getName();
563  errs() << ":%bb. " << MBB->getNumber();
564  errs() << " " << MBB->getName() << " \n";
565  }
566 
567  // Schedule a region: possibly reorder instructions.
568  // This invalidates the original region iterators.
569  Scheduler.schedule();
570 
571  // Close the current region.
572  Scheduler.exitRegion();
573  }
574  Scheduler.finishBlock();
575  // FIXME: Ideally, no further passes should rely on kill flags. However,
576  // thumb2 size reduction is currently an exception, so the PostMIScheduler
577  // needs to do this.
578  if (FixKillFlags)
579  Scheduler.fixupKills(*MBB);
580  }
581  Scheduler.finalizeSchedule();
582 }
583 
584 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
585  // unimplemented
586 }
587 
588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
590  dbgs() << "Queue " << Name << ": ";
591  for (const SUnit *SU : Queue)
592  dbgs() << SU->NodeNum << " ";
593  dbgs() << "\n";
594 }
595 #endif
596 
597 //===----------------------------------------------------------------------===//
598 // ScheduleDAGMI - Basic machine instruction scheduling. This is
599 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
600 // virtual registers.
601 // ===----------------------------------------------------------------------===/
602 
603 // Provide a vtable anchor.
605 
606 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
607  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
608 }
609 
610 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
611  if (SuccSU != &ExitSU) {
612  // Do not use WillCreateCycle, it assumes SD scheduling.
613  // If Pred is reachable from Succ, then the edge creates a cycle.
614  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
615  return false;
616  Topo.AddPred(SuccSU, PredDep.getSUnit());
617  }
618  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
619  // Return true regardless of whether a new edge needed to be inserted.
620  return true;
621 }
622 
623 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
624 /// NumPredsLeft reaches zero, release the successor node.
625 ///
626 /// FIXME: Adjust SuccSU height based on MinLatency.
627 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
628  SUnit *SuccSU = SuccEdge->getSUnit();
629 
630  if (SuccEdge->isWeak()) {
631  --SuccSU->WeakPredsLeft;
632  if (SuccEdge->isCluster())
633  NextClusterSucc = SuccSU;
634  return;
635  }
636 #ifndef NDEBUG
637  if (SuccSU->NumPredsLeft == 0) {
638  dbgs() << "*** Scheduling failed! ***\n";
639  dumpNode(*SuccSU);
640  dbgs() << " has been released too many times!\n";
641  llvm_unreachable(nullptr);
642  }
643 #endif
644  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
645  // CurrCycle may have advanced since then.
646  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
647  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
648 
649  --SuccSU->NumPredsLeft;
650  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
651  SchedImpl->releaseTopNode(SuccSU);
652 }
653 
654 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
656  for (SDep &Succ : SU->Succs)
657  releaseSucc(SU, &Succ);
658 }
659 
660 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
661 /// NumSuccsLeft reaches zero, release the predecessor node.
662 ///
663 /// FIXME: Adjust PredSU height based on MinLatency.
664 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
665  SUnit *PredSU = PredEdge->getSUnit();
666 
667  if (PredEdge->isWeak()) {
668  --PredSU->WeakSuccsLeft;
669  if (PredEdge->isCluster())
670  NextClusterPred = PredSU;
671  return;
672  }
673 #ifndef NDEBUG
674  if (PredSU->NumSuccsLeft == 0) {
675  dbgs() << "*** Scheduling failed! ***\n";
676  dumpNode(*PredSU);
677  dbgs() << " has been released too many times!\n";
678  llvm_unreachable(nullptr);
679  }
680 #endif
681  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
682  // CurrCycle may have advanced since then.
683  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
684  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
685 
686  --PredSU->NumSuccsLeft;
687  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
688  SchedImpl->releaseBottomNode(PredSU);
689 }
690 
691 /// releasePredecessors - Call releasePred on each of SU's predecessors.
693  for (SDep &Pred : SU->Preds)
694  releasePred(SU, &Pred);
695 }
696 
699  SchedImpl->enterMBB(bb);
700 }
701 
703  SchedImpl->leaveMBB();
705 }
706 
707 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
708 /// crossing a scheduling boundary. [begin, end) includes all instructions in
709 /// the region, including the boundary itself and single-instruction regions
710 /// that don't get scheduled.
714  unsigned regioninstrs)
715 {
716  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
717 
718  SchedImpl->initPolicy(begin, end, regioninstrs);
719 }
720 
721 /// This is normally called from the main scheduler loop but may also be invoked
722 /// by the scheduling strategy to perform additional code motion.
725  // Advance RegionBegin if the first instruction moves down.
726  if (&*RegionBegin == MI)
727  ++RegionBegin;
728 
729  // Update the instruction stream.
730  BB->splice(InsertPos, BB, MI);
731 
732  // Update LiveIntervals
733  if (LIS)
734  LIS->handleMove(*MI, /*UpdateFlags=*/true);
735 
736  // Recede RegionBegin if an instruction moves above the first.
737  if (RegionBegin == InsertPos)
738  RegionBegin = MI;
739 }
740 
742 #ifndef NDEBUG
743  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
744  CurrentTop = CurrentBottom;
745  return false;
746  }
747  ++NumInstrsScheduled;
748 #endif
749  return true;
750 }
751 
752 /// Per-region scheduling driver, called back from
753 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
754 /// does not consider liveness or register pressure. It is useful for PostRA
755 /// scheduling and potentially other custom schedulers.
757  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
758  LLVM_DEBUG(SchedImpl->dumpPolicy());
759 
760  // Build the DAG.
761  buildSchedGraph(AA);
762 
763  Topo.InitDAGTopologicalSorting();
764 
765  postprocessDAG();
766 
767  SmallVector<SUnit*, 8> TopRoots, BotRoots;
768  findRootsAndBiasEdges(TopRoots, BotRoots);
769 
770  LLVM_DEBUG(dump());
771  if (PrintDAGs) dump();
772  if (ViewMISchedDAGs) viewGraph();
773 
774  // Initialize the strategy before modifying the DAG.
775  // This may initialize a DFSResult to be used for queue priority.
776  SchedImpl->initialize(this);
777 
778  // Initialize ready queues now that the DAG and priority data are finalized.
779  initQueues(TopRoots, BotRoots);
780 
781  bool IsTopNode = false;
782  while (true) {
783  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
784  SUnit *SU = SchedImpl->pickNode(IsTopNode);
785  if (!SU) break;
786 
787  assert(!SU->isScheduled && "Node already scheduled");
788  if (!checkSchedLimit())
789  break;
790 
791  MachineInstr *MI = SU->getInstr();
792  if (IsTopNode) {
793  assert(SU->isTopReady() && "node still has unscheduled dependencies");
794  if (&*CurrentTop == MI)
795  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
796  else
797  moveInstruction(MI, CurrentTop);
798  } else {
799  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
801  priorNonDebug(CurrentBottom, CurrentTop);
802  if (&*priorII == MI)
803  CurrentBottom = priorII;
804  else {
805  if (&*CurrentTop == MI)
806  CurrentTop = nextIfDebug(++CurrentTop, priorII);
807  moveInstruction(MI, CurrentBottom);
808  CurrentBottom = MI;
809  }
810  }
811  // Notify the scheduling strategy before updating the DAG.
812  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
813  // runs, it can then use the accurate ReadyCycle time to determine whether
814  // newly released nodes can move to the readyQ.
815  SchedImpl->schedNode(SU, IsTopNode);
816 
817  updateQueues(SU, IsTopNode);
818  }
819  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
820 
821  placeDebugValues();
822 
823  LLVM_DEBUG({
824  dbgs() << "*** Final schedule for "
825  << printMBBReference(*begin()->getParent()) << " ***\n";
826  dumpSchedule();
827  dbgs() << '\n';
828  });
829 }
830 
831 /// Apply each ScheduleDAGMutation step in order.
833  for (auto &m : Mutations)
834  m->apply(this);
835 }
836 
837 void ScheduleDAGMI::
839  SmallVectorImpl<SUnit*> &BotRoots) {
840  for (SUnit &SU : SUnits) {
841  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
842 
843  // Order predecessors so DFSResult follows the critical path.
844  SU.biasCriticalPath();
845 
846  // A SUnit is ready to top schedule if it has no predecessors.
847  if (!SU.NumPredsLeft)
848  TopRoots.push_back(&SU);
849  // A SUnit is ready to bottom schedule if it has no successors.
850  if (!SU.NumSuccsLeft)
851  BotRoots.push_back(&SU);
852  }
853  ExitSU.biasCriticalPath();
854 }
855 
856 /// Identify DAG roots and setup scheduler queues.
858  ArrayRef<SUnit*> BotRoots) {
859  NextClusterSucc = nullptr;
860  NextClusterPred = nullptr;
861 
862  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
863  //
864  // Nodes with unreleased weak edges can still be roots.
865  // Release top roots in forward order.
866  for (SUnit *SU : TopRoots)
867  SchedImpl->releaseTopNode(SU);
868 
869  // Release bottom roots in reverse order so the higher priority nodes appear
870  // first. This is more natural and slightly more efficient.
872  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
873  SchedImpl->releaseBottomNode(*I);
874  }
875 
876  releaseSuccessors(&EntrySU);
877  releasePredecessors(&ExitSU);
878 
879  SchedImpl->registerRoots();
880 
881  // Advance past initial DebugValues.
882  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
883  CurrentBottom = RegionEnd;
884 }
885 
886 /// Update scheduler queues after scheduling an instruction.
887 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
888  // Release dependent instructions for scheduling.
889  if (IsTopNode)
890  releaseSuccessors(SU);
891  else
892  releasePredecessors(SU);
893 
894  SU->isScheduled = true;
895 }
896 
897 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
899  // If first instruction was a DBG_VALUE then put it back.
900  if (FirstDbgValue) {
901  BB->splice(RegionBegin, BB, FirstDbgValue);
902  RegionBegin = FirstDbgValue;
903  }
904 
905  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
906  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
907  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
908  MachineInstr *DbgValue = P.first;
909  MachineBasicBlock::iterator OrigPrevMI = P.second;
910  if (&*RegionBegin == DbgValue)
911  ++RegionBegin;
912  BB->splice(++OrigPrevMI, BB, DbgValue);
913  if (OrigPrevMI == std::prev(RegionEnd))
914  RegionEnd = DbgValue;
915  }
916  DbgValues.clear();
917  FirstDbgValue = nullptr;
918 }
919 
920 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
922  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
923  if (SUnit *SU = getSUnit(&(*MI)))
924  dumpNode(*SU);
925  else
926  dbgs() << "Missing SUnit\n";
927  }
928 }
929 #endif
930 
931 //===----------------------------------------------------------------------===//
932 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
933 // preservation.
934 //===----------------------------------------------------------------------===//
935 
937  delete DFSResult;
938 }
939 
941  const MachineInstr &MI = *SU.getInstr();
942  for (const MachineOperand &MO : MI.operands()) {
943  if (!MO.isReg())
944  continue;
945  if (!MO.readsReg())
946  continue;
947  if (TrackLaneMasks && !MO.isUse())
948  continue;
949 
950  unsigned Reg = MO.getReg();
952  continue;
953 
954  // Ignore re-defs.
955  if (TrackLaneMasks) {
956  bool FoundDef = false;
957  for (const MachineOperand &MO2 : MI.operands()) {
958  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
959  FoundDef = true;
960  break;
961  }
962  }
963  if (FoundDef)
964  continue;
965  }
966 
967  // Record this local VReg use.
968  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
969  for (; UI != VRegUses.end(); ++UI) {
970  if (UI->SU == &SU)
971  break;
972  }
973  if (UI == VRegUses.end())
974  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
975  }
976 }
977 
978 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
979 /// crossing a scheduling boundary. [begin, end) includes all instructions in
980 /// the region, including the boundary itself and single-instruction regions
981 /// that don't get scheduled.
985  unsigned regioninstrs)
986 {
987  // ScheduleDAGMI initializes SchedImpl's per-region policy.
988  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
989 
990  // For convenience remember the end of the liveness region.
991  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
992 
993  SUPressureDiffs.clear();
994 
995  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
996  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
997 
998  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
999  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1000 }
1001 
1002 // Setup the register pressure trackers for the top scheduled top and bottom
1003 // scheduled regions.
1005  VRegUses.clear();
1006  VRegUses.setUniverse(MRI.getNumVirtRegs());
1007  for (SUnit &SU : SUnits)
1008  collectVRegUses(SU);
1009 
1010  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1011  ShouldTrackLaneMasks, false);
1012  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1013  ShouldTrackLaneMasks, false);
1014 
1015  // Close the RPTracker to finalize live ins.
1016  RPTracker.closeRegion();
1017 
1018  LLVM_DEBUG(RPTracker.dump());
1019 
1020  // Initialize the live ins and live outs.
1021  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1022  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1023 
1024  // Close one end of the tracker so we can call
1025  // getMaxUpward/DownwardPressureDelta before advancing across any
1026  // instructions. This converts currently live regs into live ins/outs.
1027  TopRPTracker.closeTop();
1028  BotRPTracker.closeBottom();
1029 
1030  BotRPTracker.initLiveThru(RPTracker);
1031  if (!BotRPTracker.getLiveThru().empty()) {
1032  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1033  LLVM_DEBUG(dbgs() << "Live Thru: ";
1034  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1035  };
1036 
1037  // For each live out vreg reduce the pressure change associated with other
1038  // uses of the same vreg below the live-out reaching def.
1039  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1040 
1041  // Account for liveness generated by the region boundary.
1042  if (LiveRegionEnd != RegionEnd) {
1044  BotRPTracker.recede(&LiveUses);
1045  updatePressureDiffs(LiveUses);
1046  }
1047 
1048  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1049  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1050  dbgs() << "Bottom Pressure:\n";
1051  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1052 
1053  assert((BotRPTracker.getPos() == RegionEnd ||
1054  (RegionEnd->isDebugInstr() &&
1055  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1056  "Can't find the region bottom");
1057 
1058  // Cache the list of excess pressure sets in this region. This will also track
1059  // the max pressure in the scheduled code for these sets.
1060  RegionCriticalPSets.clear();
1061  const std::vector<unsigned> &RegionPressure =
1062  RPTracker.getPressure().MaxSetPressure;
1063  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1064  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1065  if (RegionPressure[i] > Limit) {
1066  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1067  << " Actual " << RegionPressure[i] << "\n");
1068  RegionCriticalPSets.push_back(PressureChange(i));
1069  }
1070  }
1071  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1072  for (const PressureChange &RCPS
1073  : RegionCriticalPSets) dbgs()
1074  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1075  dbgs() << "\n");
1076 }
1077 
1080  const std::vector<unsigned> &NewMaxPressure) {
1081  const PressureDiff &PDiff = getPressureDiff(SU);
1082  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1083  for (const PressureChange &PC : PDiff) {
1084  if (!PC.isValid())
1085  break;
1086  unsigned ID = PC.getPSet();
1087  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1088  ++CritIdx;
1089  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1090  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1091  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1092  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1093  }
1094  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1095  if (NewMaxPressure[ID] >= Limit - 2) {
1096  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1097  << NewMaxPressure[ID]
1098  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1099  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1100  << " livethru)\n");
1101  }
1102  }
1103 }
1104 
1105 /// Update the PressureDiff array for liveness after scheduling this
1106 /// instruction.
1108  ArrayRef<RegisterMaskPair> LiveUses) {
1109  for (const RegisterMaskPair &P : LiveUses) {
1110  unsigned Reg = P.RegUnit;
1111  /// FIXME: Currently assuming single-use physregs.
1112  if (!TRI->isVirtualRegister(Reg))
1113  continue;
1114 
1115  if (ShouldTrackLaneMasks) {
1116  // If the register has just become live then other uses won't change
1117  // this fact anymore => decrement pressure.
1118  // If the register has just become dead then other uses make it come
1119  // back to life => increment pressure.
1120  bool Decrement = P.LaneMask.any();
1121 
1122  for (const VReg2SUnit &V2SU
1123  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1124  SUnit &SU = *V2SU.SU;
1125  if (SU.isScheduled || &SU == &ExitSU)
1126  continue;
1127 
1128  PressureDiff &PDiff = getPressureDiff(&SU);
1129  PDiff.addPressureChange(Reg, Decrement, &MRI);
1130  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1131  << printReg(Reg, TRI) << ':'
1132  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1133  dbgs() << " to "; PDiff.dump(*TRI););
1134  }
1135  } else {
1136  assert(P.LaneMask.any());
1137  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1138  // This may be called before CurrentBottom has been initialized. However,
1139  // BotRPTracker must have a valid position. We want the value live into the
1140  // instruction or live out of the block, so ask for the previous
1141  // instruction's live-out.
1142  const LiveInterval &LI = LIS->getInterval(Reg);
1143  VNInfo *VNI;
1145  nextIfDebug(BotRPTracker.getPos(), BB->end());
1146  if (I == BB->end())
1147  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1148  else {
1149  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1150  VNI = LRQ.valueIn();
1151  }
1152  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1153  assert(VNI && "No live value at use.");
1154  for (const VReg2SUnit &V2SU
1155  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1156  SUnit *SU = V2SU.SU;
1157  // If this use comes before the reaching def, it cannot be a last use,
1158  // so decrease its pressure change.
1159  if (!SU->isScheduled && SU != &ExitSU) {
1160  LiveQueryResult LRQ =
1161  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1162  if (LRQ.valueIn() == VNI) {
1163  PressureDiff &PDiff = getPressureDiff(SU);
1164  PDiff.addPressureChange(Reg, true, &MRI);
1165  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1166  << *SU->getInstr();
1167  dbgs() << " to "; PDiff.dump(*TRI););
1168  }
1169  }
1170  }
1171  }
1172  }
1173 }
1174 
1176 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1177  if (EntrySU.getInstr() != nullptr)
1178  dumpNodeAll(EntrySU);
1179  for (const SUnit &SU : SUnits) {
1180  dumpNodeAll(SU);
1181  if (ShouldTrackPressure) {
1182  dbgs() << " Pressure Diff : ";
1183  getPressureDiff(&SU).dump(*TRI);
1184  }
1185  dbgs() << " Single Issue : ";
1186  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1187  SchedModel.mustEndGroup(SU.getInstr()))
1188  dbgs() << "true;";
1189  else
1190  dbgs() << "false;";
1191  dbgs() << '\n';
1192  }
1193  if (ExitSU.getInstr() != nullptr)
1194  dumpNodeAll(ExitSU);
1195 #endif
1196 }
1197 
1198 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1200 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1201 ///
1202 /// This is a skeletal driver, with all the functionality pushed into helpers,
1203 /// so that it can be easily extended by experimental schedulers. Generally,
1204 /// implementing MachineSchedStrategy should be sufficient to implement a new
1205 /// scheduling algorithm. However, if a scheduler further subclasses
1206 /// ScheduleDAGMILive then it will want to override this virtual method in order
1207 /// to update any specialized state.
1209  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1210  LLVM_DEBUG(SchedImpl->dumpPolicy());
1211  buildDAGWithRegPressure();
1212 
1213  Topo.InitDAGTopologicalSorting();
1214 
1215  postprocessDAG();
1216 
1217  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1218  findRootsAndBiasEdges(TopRoots, BotRoots);
1219 
1220  // Initialize the strategy before modifying the DAG.
1221  // This may initialize a DFSResult to be used for queue priority.
1222  SchedImpl->initialize(this);
1223 
1224  LLVM_DEBUG(dump());
1225  if (PrintDAGs) dump();
1226  if (ViewMISchedDAGs) viewGraph();
1227 
1228  // Initialize ready queues now that the DAG and priority data are finalized.
1229  initQueues(TopRoots, BotRoots);
1230 
1231  bool IsTopNode = false;
1232  while (true) {
1233  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1234  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1235  if (!SU) break;
1236 
1237  assert(!SU->isScheduled && "Node already scheduled");
1238  if (!checkSchedLimit())
1239  break;
1240 
1241  scheduleMI(SU, IsTopNode);
1242 
1243  if (DFSResult) {
1244  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1245  if (!ScheduledTrees.test(SubtreeID)) {
1246  ScheduledTrees.set(SubtreeID);
1247  DFSResult->scheduleTree(SubtreeID);
1248  SchedImpl->scheduleTree(SubtreeID);
1249  }
1250  }
1251 
1252  // Notify the scheduling strategy after updating the DAG.
1253  SchedImpl->schedNode(SU, IsTopNode);
1254 
1255  updateQueues(SU, IsTopNode);
1256  }
1257  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1258 
1259  placeDebugValues();
1260 
1261  LLVM_DEBUG({
1262  dbgs() << "*** Final schedule for "
1263  << printMBBReference(*begin()->getParent()) << " ***\n";
1264  dumpSchedule();
1265  dbgs() << '\n';
1266  });
1267 }
1268 
1269 /// Build the DAG and setup three register pressure trackers.
1271  if (!ShouldTrackPressure) {
1272  RPTracker.reset();
1273  RegionCriticalPSets.clear();
1274  buildSchedGraph(AA);
1275  return;
1276  }
1277 
1278  // Initialize the register pressure tracker used by buildSchedGraph.
1279  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1280  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1281 
1282  // Account for liveness generate by the region boundary.
1283  if (LiveRegionEnd != RegionEnd)
1284  RPTracker.recede();
1285 
1286  // Build the DAG, and compute current register pressure.
1287  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1288 
1289  // Initialize top/bottom trackers after computing region pressure.
1290  initRegPressure();
1291 }
1292 
1294  if (!DFSResult)
1295  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1296  DFSResult->clear();
1297  ScheduledTrees.clear();
1298  DFSResult->resize(SUnits.size());
1299  DFSResult->compute(SUnits);
1300  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1301 }
1302 
1303 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1304 /// only provides the critical path for single block loops. To handle loops that
1305 /// span blocks, we could use the vreg path latencies provided by
1306 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1307 /// available for use in the scheduler.
1308 ///
1309 /// The cyclic path estimation identifies a def-use pair that crosses the back
1310 /// edge and considers the depth and height of the nodes. For example, consider
1311 /// the following instruction sequence where each instruction has unit latency
1312 /// and defines an epomymous virtual register:
1313 ///
1314 /// a->b(a,c)->c(b)->d(c)->exit
1315 ///
1316 /// The cyclic critical path is a two cycles: b->c->b
1317 /// The acyclic critical path is four cycles: a->b->c->d->exit
1318 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1319 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1320 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1321 /// LiveInDepth = depth(b) = len(a->b) = 1
1322 ///
1323 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1324 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1325 /// CyclicCriticalPath = min(2, 2) = 2
1326 ///
1327 /// This could be relevant to PostRA scheduling, but is currently implemented
1328 /// assuming LiveIntervals.
1330  // This only applies to single block loop.
1331  if (!BB->isSuccessor(BB))
1332  return 0;
1333 
1334  unsigned MaxCyclicLatency = 0;
1335  // Visit each live out vreg def to find def/use pairs that cross iterations.
1336  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1337  unsigned Reg = P.RegUnit;
1338  if (!TRI->isVirtualRegister(Reg))
1339  continue;
1340  const LiveInterval &LI = LIS->getInterval(Reg);
1341  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1342  if (!DefVNI)
1343  continue;
1344 
1345  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1346  const SUnit *DefSU = getSUnit(DefMI);
1347  if (!DefSU)
1348  continue;
1349 
1350  unsigned LiveOutHeight = DefSU->getHeight();
1351  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1352  // Visit all local users of the vreg def.
1353  for (const VReg2SUnit &V2SU
1354  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1355  SUnit *SU = V2SU.SU;
1356  if (SU == &ExitSU)
1357  continue;
1358 
1359  // Only consider uses of the phi.
1360  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1361  if (!LRQ.valueIn()->isPHIDef())
1362  continue;
1363 
1364  // Assume that a path spanning two iterations is a cycle, which could
1365  // overestimate in strange cases. This allows cyclic latency to be
1366  // estimated as the minimum slack of the vreg's depth or height.
1367  unsigned CyclicLatency = 0;
1368  if (LiveOutDepth > SU->getDepth())
1369  CyclicLatency = LiveOutDepth - SU->getDepth();
1370 
1371  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1372  if (LiveInHeight > LiveOutHeight) {
1373  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1374  CyclicLatency = LiveInHeight - LiveOutHeight;
1375  } else
1376  CyclicLatency = 0;
1377 
1378  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1379  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1380  if (CyclicLatency > MaxCyclicLatency)
1381  MaxCyclicLatency = CyclicLatency;
1382  }
1383  }
1384  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1385  return MaxCyclicLatency;
1386 }
1387 
1388 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1389 /// the Top RP tracker in case the region beginning has changed.
1391  ArrayRef<SUnit*> BotRoots) {
1392  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1393  if (ShouldTrackPressure) {
1394  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1395  TopRPTracker.setPos(CurrentTop);
1396  }
1397 }
1398 
1399 /// Move an instruction and update register pressure.
1400 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1401  // Move the instruction to its new location in the instruction stream.
1402  MachineInstr *MI = SU->getInstr();
1403 
1404  if (IsTopNode) {
1405  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1406  if (&*CurrentTop == MI)
1407  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1408  else {
1409  moveInstruction(MI, CurrentTop);
1410  TopRPTracker.setPos(MI);
1411  }
1412 
1413  if (ShouldTrackPressure) {
1414  // Update top scheduled pressure.
1415  RegisterOperands RegOpers;
1416  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1417  if (ShouldTrackLaneMasks) {
1418  // Adjust liveness and add missing dead+read-undef flags.
1419  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1420  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1421  } else {
1422  // Adjust for missing dead-def flags.
1423  RegOpers.detectDeadDefs(*MI, *LIS);
1424  }
1425 
1426  TopRPTracker.advance(RegOpers);
1427  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1428  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1429  TopRPTracker.getRegSetPressureAtPos(), TRI););
1430 
1431  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1432  }
1433  } else {
1434  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1435  MachineBasicBlock::iterator priorII =
1436  priorNonDebug(CurrentBottom, CurrentTop);
1437  if (&*priorII == MI)
1438  CurrentBottom = priorII;
1439  else {
1440  if (&*CurrentTop == MI) {
1441  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1442  TopRPTracker.setPos(CurrentTop);
1443  }
1444  moveInstruction(MI, CurrentBottom);
1445  CurrentBottom = MI;
1446  BotRPTracker.setPos(CurrentBottom);
1447  }
1448  if (ShouldTrackPressure) {
1449  RegisterOperands RegOpers;
1450  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1451  if (ShouldTrackLaneMasks) {
1452  // Adjust liveness and add missing dead+read-undef flags.
1453  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1454  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1455  } else {
1456  // Adjust for missing dead-def flags.
1457  RegOpers.detectDeadDefs(*MI, *LIS);
1458  }
1459 
1460  if (BotRPTracker.getPos() != CurrentBottom)
1461  BotRPTracker.recedeSkipDebugValues();
1463  BotRPTracker.recede(RegOpers, &LiveUses);
1464  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1465  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1466  BotRPTracker.getRegSetPressureAtPos(), TRI););
1467 
1468  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1469  updatePressureDiffs(LiveUses);
1470  }
1471  }
1472 }
1473 
1474 //===----------------------------------------------------------------------===//
1475 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1476 //===----------------------------------------------------------------------===//
1477 
1478 namespace {
1479 
1480 /// Post-process the DAG to create cluster edges between neighboring
1481 /// loads or between neighboring stores.
1482 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1483  struct MemOpInfo {
1484  SUnit *SU;
1485  unsigned BaseReg;
1486  int64_t Offset;
1487 
1488  MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1489  : SU(su), BaseReg(reg), Offset(ofs) {}
1490 
1491  bool operator<(const MemOpInfo&RHS) const {
1492  return std::tie(BaseReg, Offset, SU->NodeNum) <
1493  std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1494  }
1495  };
1496 
1497  const TargetInstrInfo *TII;
1498  const TargetRegisterInfo *TRI;
1499  bool IsLoad;
1500 
1501 public:
1502  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1503  const TargetRegisterInfo *tri, bool IsLoad)
1504  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1505 
1506  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1507 
1508 protected:
1509  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1510 };
1511 
1512 class StoreClusterMutation : public BaseMemOpClusterMutation {
1513 public:
1514  StoreClusterMutation(const TargetInstrInfo *tii,
1515  const TargetRegisterInfo *tri)
1516  : BaseMemOpClusterMutation(tii, tri, false) {}
1517 };
1518 
1519 class LoadClusterMutation : public BaseMemOpClusterMutation {
1520 public:
1521  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1522  : BaseMemOpClusterMutation(tii, tri, true) {}
1523 };
1524 
1525 } // end anonymous namespace
1526 
1527 namespace llvm {
1528 
1529 std::unique_ptr<ScheduleDAGMutation>
1531  const TargetRegisterInfo *TRI) {
1532  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1533  : nullptr;
1534 }
1535 
1536 std::unique_ptr<ScheduleDAGMutation>
1538  const TargetRegisterInfo *TRI) {
1539  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1540  : nullptr;
1541 }
1542 
1543 } // end namespace llvm
1544 
1545 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1546  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1547  SmallVector<MemOpInfo, 32> MemOpRecords;
1548  for (SUnit *SU : MemOps) {
1549  unsigned BaseReg;
1550  int64_t Offset;
1551  if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1552  MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1553  }
1554  if (MemOpRecords.size() < 2)
1555  return;
1556 
1557  llvm::sort(MemOpRecords);
1558  unsigned ClusterLength = 1;
1559  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1560  SUnit *SUa = MemOpRecords[Idx].SU;
1561  SUnit *SUb = MemOpRecords[Idx+1].SU;
1562  if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1563  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1564  ClusterLength) &&
1565  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1566  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1567  << SUb->NodeNum << ")\n");
1568  // Copy successor edges from SUa to SUb. Interleaving computation
1569  // dependent on SUa can prevent load combining due to register reuse.
1570  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1571  // loads should have effectively the same inputs.
1572  for (const SDep &Succ : SUa->Succs) {
1573  if (Succ.getSUnit() == SUb)
1574  continue;
1575  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1576  << ")\n");
1577  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1578  }
1579  ++ClusterLength;
1580  } else
1581  ClusterLength = 1;
1582  }
1583 }
1584 
1585 /// Callback from DAG postProcessing to create cluster edges for loads.
1587  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1588 
1589  // Map DAG NodeNum to store chain ID.
1590  DenseMap<unsigned, unsigned> StoreChainIDs;
1591  // Map each store chain to a set of dependent MemOps.
1592  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1593  for (SUnit &SU : DAG->SUnits) {
1594  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1595  (!IsLoad && !SU.getInstr()->mayStore()))
1596  continue;
1597 
1598  unsigned ChainPredID = DAG->SUnits.size();
1599  for (const SDep &Pred : SU.Preds) {
1600  if (Pred.isCtrl()) {
1601  ChainPredID = Pred.getSUnit()->NodeNum;
1602  break;
1603  }
1604  }
1605  // Check if this chain-like pred has been seen
1606  // before. ChainPredID==MaxNodeID at the top of the schedule.
1607  unsigned NumChains = StoreChainDependents.size();
1608  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1609  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1610  if (Result.second)
1611  StoreChainDependents.resize(NumChains + 1);
1612  StoreChainDependents[Result.first->second].push_back(&SU);
1613  }
1614 
1615  // Iterate over the store chains.
1616  for (auto &SCD : StoreChainDependents)
1617  clusterNeighboringMemOps(SCD, DAG);
1618 }
1619 
1620 //===----------------------------------------------------------------------===//
1621 // CopyConstrain - DAG post-processing to encourage copy elimination.
1622 //===----------------------------------------------------------------------===//
1623 
1624 namespace {
1625 
1626 /// Post-process the DAG to create weak edges from all uses of a copy to
1627 /// the one use that defines the copy's source vreg, most likely an induction
1628 /// variable increment.
1629 class CopyConstrain : public ScheduleDAGMutation {
1630  // Transient state.
1631  SlotIndex RegionBeginIdx;
1632 
1633  // RegionEndIdx is the slot index of the last non-debug instruction in the
1634  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1635  SlotIndex RegionEndIdx;
1636 
1637 public:
1638  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1639 
1640  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1641 
1642 protected:
1643  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1644 };
1645 
1646 } // end anonymous namespace
1647 
1648 namespace llvm {
1649 
1650 std::unique_ptr<ScheduleDAGMutation>
1652  const TargetRegisterInfo *TRI) {
1653  return llvm::make_unique<CopyConstrain>(TII, TRI);
1654 }
1655 
1656 } // end namespace llvm
1657 
1658 /// constrainLocalCopy handles two possibilities:
1659 /// 1) Local src:
1660 /// I0: = dst
1661 /// I1: src = ...
1662 /// I2: = dst
1663 /// I3: dst = src (copy)
1664 /// (create pred->succ edges I0->I1, I2->I1)
1665 ///
1666 /// 2) Local copy:
1667 /// I0: dst = src (copy)
1668 /// I1: = dst
1669 /// I2: src = ...
1670 /// I3: = dst
1671 /// (create pred->succ edges I1->I2, I3->I2)
1672 ///
1673 /// Although the MachineScheduler is currently constrained to single blocks,
1674 /// this algorithm should handle extended blocks. An EBB is a set of
1675 /// contiguously numbered blocks such that the previous block in the EBB is
1676 /// always the single predecessor.
1677 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1678  LiveIntervals *LIS = DAG->getLIS();
1679  MachineInstr *Copy = CopySU->getInstr();
1680 
1681  // Check for pure vreg copies.
1682  const MachineOperand &SrcOp = Copy->getOperand(1);
1683  unsigned SrcReg = SrcOp.getReg();
1684  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1685  return;
1686 
1687  const MachineOperand &DstOp = Copy->getOperand(0);
1688  unsigned DstReg = DstOp.getReg();
1689  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1690  return;
1691 
1692  // Check if either the dest or source is local. If it's live across a back
1693  // edge, it's not local. Note that if both vregs are live across the back
1694  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1695  // If both the copy's source and dest are local live intervals, then we
1696  // should treat the dest as the global for the purpose of adding
1697  // constraints. This adds edges from source's other uses to the copy.
1698  unsigned LocalReg = SrcReg;
1699  unsigned GlobalReg = DstReg;
1700  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1701  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1702  LocalReg = DstReg;
1703  GlobalReg = SrcReg;
1704  LocalLI = &LIS->getInterval(LocalReg);
1705  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1706  return;
1707  }
1708  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1709 
1710  // Find the global segment after the start of the local LI.
1711  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1712  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1713  // local live range. We could create edges from other global uses to the local
1714  // start, but the coalescer should have already eliminated these cases, so
1715  // don't bother dealing with it.
1716  if (GlobalSegment == GlobalLI->end())
1717  return;
1718 
1719  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1720  // returned the next global segment. But if GlobalSegment overlaps with
1721  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1722  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1723  if (GlobalSegment->contains(LocalLI->beginIndex()))
1724  ++GlobalSegment;
1725 
1726  if (GlobalSegment == GlobalLI->end())
1727  return;
1728 
1729  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1730  if (GlobalSegment != GlobalLI->begin()) {
1731  // Two address defs have no hole.
1732  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1733  GlobalSegment->start)) {
1734  return;
1735  }
1736  // If the prior global segment may be defined by the same two-address
1737  // instruction that also defines LocalLI, then can't make a hole here.
1738  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1739  LocalLI->beginIndex())) {
1740  return;
1741  }
1742  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1743  // it would be a disconnected component in the live range.
1744  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1745  "Disconnected LRG within the scheduling region.");
1746  }
1747  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1748  if (!GlobalDef)
1749  return;
1750 
1751  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1752  if (!GlobalSU)
1753  return;
1754 
1755  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1756  // constraining the uses of the last local def to precede GlobalDef.
1757  SmallVector<SUnit*,8> LocalUses;
1758  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1759  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1760  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1761  for (const SDep &Succ : LastLocalSU->Succs) {
1762  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1763  continue;
1764  if (Succ.getSUnit() == GlobalSU)
1765  continue;
1766  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1767  return;
1768  LocalUses.push_back(Succ.getSUnit());
1769  }
1770  // Open the top of the GlobalLI hole by constraining any earlier global uses
1771  // to precede the start of LocalLI.
1772  SmallVector<SUnit*,8> GlobalUses;
1773  MachineInstr *FirstLocalDef =
1774  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1775  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1776  for (const SDep &Pred : GlobalSU->Preds) {
1777  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1778  continue;
1779  if (Pred.getSUnit() == FirstLocalSU)
1780  continue;
1781  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1782  return;
1783  GlobalUses.push_back(Pred.getSUnit());
1784  }
1785  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1786  // Add the weak edges.
1788  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1789  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1790  << GlobalSU->NodeNum << ")\n");
1791  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1792  }
1794  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1795  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1796  << FirstLocalSU->NodeNum << ")\n");
1797  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1798  }
1799 }
1800 
1801 /// Callback from DAG postProcessing to create weak edges to encourage
1802 /// copy elimination.
1803 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1804  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1805  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1806 
1807  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1808  if (FirstPos == DAG->end())
1809  return;
1810  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1811  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1812  *priorNonDebug(DAG->end(), DAG->begin()));
1813 
1814  for (SUnit &SU : DAG->SUnits) {
1815  if (!SU.getInstr()->isCopy())
1816  continue;
1817 
1818  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1819  }
1820 }
1821 
1822 //===----------------------------------------------------------------------===//
1823 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1824 // and possibly other custom schedulers.
1825 //===----------------------------------------------------------------------===//
1826 
1827 static const unsigned InvalidCycle = ~0U;
1828 
1829 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1830 
1831 /// Given a Count of resource usage and a Latency value, return true if a
1832 /// SchedBoundary becomes resource limited.
1833 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1834  unsigned Latency) {
1835  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1836 }
1837 
1839  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1840  // Destroying and reconstructing it is very expensive though. So keep
1841  // invalid, placeholder HazardRecs.
1842  if (HazardRec && HazardRec->isEnabled()) {
1843  delete HazardRec;
1844  HazardRec = nullptr;
1845  }
1846  Available.clear();
1847  Pending.clear();
1848  CheckPending = false;
1849  CurrCycle = 0;
1850  CurrMOps = 0;
1851  MinReadyCycle = std::numeric_limits<unsigned>::max();
1852  ExpectedLatency = 0;
1853  DependentLatency = 0;
1854  RetiredMOps = 0;
1855  MaxExecutedResCount = 0;
1856  ZoneCritResIdx = 0;
1857  IsResourceLimited = false;
1858  ReservedCycles.clear();
1859 #ifndef NDEBUG
1860  // Track the maximum number of stall cycles that could arise either from the
1861  // latency of a DAG edge or the number of cycles that a processor resource is
1862  // reserved (SchedBoundary::ReservedCycles).
1863  MaxObservedStall = 0;
1864 #endif
1865  // Reserve a zero-count for invalid CritResIdx.
1866  ExecutedResCounts.resize(1);
1867  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1868 }
1869 
1870 void SchedRemainder::
1871 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1872  reset();
1873  if (!SchedModel->hasInstrSchedModel())
1874  return;
1875  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1876  for (SUnit &SU : DAG->SUnits) {
1877  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1878  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1879  * SchedModel->getMicroOpFactor();
1881  PI = SchedModel->getWriteProcResBegin(SC),
1882  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1883  unsigned PIdx = PI->ProcResourceIdx;
1884  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1885  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1886  }
1887  }
1888 }
1889 
1890 void SchedBoundary::
1892  reset();
1893  DAG = dag;
1894  SchedModel = smodel;
1895  Rem = rem;
1896  if (SchedModel->hasInstrSchedModel()) {
1897  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1898  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1899  }
1900 }
1901 
1902 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1903 /// these "soft stalls" differently than the hard stall cycles based on CPU
1904 /// resources and computed by checkHazard(). A fully in-order model
1905 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1906 /// available for scheduling until they are ready. However, a weaker in-order
1907 /// model may use this for heuristics. For example, if a processor has in-order
1908 /// behavior when reading certain resources, this may come into play.
1910  if (!SU->isUnbuffered)
1911  return 0;
1912 
1913  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1914  if (ReadyCycle > CurrCycle)
1915  return ReadyCycle - CurrCycle;
1916  return 0;
1917 }
1918 
1919 /// Compute the next cycle at which the given processor resource can be
1920 /// scheduled.
1921 unsigned SchedBoundary::
1922 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1923  unsigned NextUnreserved = ReservedCycles[PIdx];
1924  // If this resource has never been used, always return cycle zero.
1925  if (NextUnreserved == InvalidCycle)
1926  return 0;
1927  // For bottom-up scheduling add the cycles needed for the current operation.
1928  if (!isTop())
1929  NextUnreserved += Cycles;
1930  return NextUnreserved;
1931 }
1932 
1933 /// Does this SU have a hazard within the current instruction group.
1934 ///
1935 /// The scheduler supports two modes of hazard recognition. The first is the
1936 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1937 /// supports highly complicated in-order reservation tables
1938 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1939 ///
1940 /// The second is a streamlined mechanism that checks for hazards based on
1941 /// simple counters that the scheduler itself maintains. It explicitly checks
1942 /// for instruction dispatch limitations, including the number of micro-ops that
1943 /// can dispatch per cycle.
1944 ///
1945 /// TODO: Also check whether the SU must start a new group.
1947  if (HazardRec->isEnabled()
1948  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1949  return true;
1950  }
1951 
1952  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1953  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1954  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1955  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1956  return true;
1957  }
1958 
1959  if (CurrMOps > 0 &&
1960  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1961  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1962  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1963  << (isTop() ? "begin" : "end") << " group\n");
1964  return true;
1965  }
1966 
1967  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1968  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1969  for (const MCWriteProcResEntry &PE :
1970  make_range(SchedModel->getWriteProcResBegin(SC),
1971  SchedModel->getWriteProcResEnd(SC))) {
1972  unsigned ResIdx = PE.ProcResourceIdx;
1973  unsigned Cycles = PE.Cycles;
1974  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1975  if (NRCycle > CurrCycle) {
1976 #ifndef NDEBUG
1977  MaxObservedStall = std::max(Cycles, MaxObservedStall);
1978 #endif
1979  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1980  << SchedModel->getResourceName(ResIdx) << "="
1981  << NRCycle << "c\n");
1982  return true;
1983  }
1984  }
1985  }
1986  return false;
1987 }
1988 
1989 // Find the unscheduled node in ReadySUs with the highest latency.
1990 unsigned SchedBoundary::
1992  SUnit *LateSU = nullptr;
1993  unsigned RemLatency = 0;
1994  for (SUnit *SU : ReadySUs) {
1995  unsigned L = getUnscheduledLatency(SU);
1996  if (L > RemLatency) {
1997  RemLatency = L;
1998  LateSU = SU;
1999  }
2000  }
2001  if (LateSU) {
2002  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2003  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2004  }
2005  return RemLatency;
2006 }
2007 
2008 // Count resources in this zone and the remaining unscheduled
2009 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2010 // resource index, or zero if the zone is issue limited.
2011 unsigned SchedBoundary::
2012 getOtherResourceCount(unsigned &OtherCritIdx) {
2013  OtherCritIdx = 0;
2014  if (!SchedModel->hasInstrSchedModel())
2015  return 0;
2016 
2017  unsigned OtherCritCount = Rem->RemIssueCount
2018  + (RetiredMOps * SchedModel->getMicroOpFactor());
2019  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2020  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2021  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2022  PIdx != PEnd; ++PIdx) {
2023  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2024  if (OtherCount > OtherCritCount) {
2025  OtherCritCount = OtherCount;
2026  OtherCritIdx = PIdx;
2027  }
2028  }
2029  if (OtherCritIdx) {
2030  LLVM_DEBUG(
2031  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2032  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2033  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2034  }
2035  return OtherCritCount;
2036 }
2037 
2038 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2039  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2040 
2041 #ifndef NDEBUG
2042  // ReadyCycle was been bumped up to the CurrCycle when this node was
2043  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2044  // scheduling, so may now be greater than ReadyCycle.
2045  if (ReadyCycle > CurrCycle)
2046  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2047 #endif
2048 
2049  if (ReadyCycle < MinReadyCycle)
2050  MinReadyCycle = ReadyCycle;
2051 
2052  // Check for interlocks first. For the purpose of other heuristics, an
2053  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2054  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2055  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2056  Available.size() >= ReadyListLimit)
2057  Pending.push(SU);
2058  else
2059  Available.push(SU);
2060 }
2061 
2062 /// Move the boundary of scheduled code by one cycle.
2063 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2064  if (SchedModel->getMicroOpBufferSize() == 0) {
2065  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2066  "MinReadyCycle uninitialized");
2067  if (MinReadyCycle > NextCycle)
2068  NextCycle = MinReadyCycle;
2069  }
2070  // Update the current micro-ops, which will issue in the next cycle.
2071  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2072  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2073 
2074  // Decrement DependentLatency based on the next cycle.
2075  if ((NextCycle - CurrCycle) > DependentLatency)
2076  DependentLatency = 0;
2077  else
2078  DependentLatency -= (NextCycle - CurrCycle);
2079 
2080  if (!HazardRec->isEnabled()) {
2081  // Bypass HazardRec virtual calls.
2082  CurrCycle = NextCycle;
2083  } else {
2084  // Bypass getHazardType calls in case of long latency.
2085  for (; CurrCycle != NextCycle; ++CurrCycle) {
2086  if (isTop())
2087  HazardRec->AdvanceCycle();
2088  else
2089  HazardRec->RecedeCycle();
2090  }
2091  }
2092  CheckPending = true;
2093  IsResourceLimited =
2094  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2095  getScheduledLatency());
2096 
2097  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2098  << '\n');
2099 }
2100 
2101 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2102  ExecutedResCounts[PIdx] += Count;
2103  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2104  MaxExecutedResCount = ExecutedResCounts[PIdx];
2105 }
2106 
2107 /// Add the given processor resource to this scheduled zone.
2108 ///
2109 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2110 /// during which this resource is consumed.
2111 ///
2112 /// \return the next cycle at which the instruction may execute without
2113 /// oversubscribing resources.
2114 unsigned SchedBoundary::
2115 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2116  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2117  unsigned Count = Factor * Cycles;
2118  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2119  << Cycles << "x" << Factor << "u\n");
2120 
2121  // Update Executed resources counts.
2122  incExecutedResources(PIdx, Count);
2123  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2124  Rem->RemainingCounts[PIdx] -= Count;
2125 
2126  // Check if this resource exceeds the current critical resource. If so, it
2127  // becomes the critical resource.
2128  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2129  ZoneCritResIdx = PIdx;
2130  LLVM_DEBUG(dbgs() << " *** Critical resource "
2131  << SchedModel->getResourceName(PIdx) << ": "
2132  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2133  << "c\n");
2134  }
2135  // For reserved resources, record the highest cycle using the resource.
2136  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2137  if (NextAvailable > CurrCycle) {
2138  LLVM_DEBUG(dbgs() << " Resource conflict: "
2139  << SchedModel->getProcResource(PIdx)->Name
2140  << " reserved until @" << NextAvailable << "\n");
2141  }
2142  return NextAvailable;
2143 }
2144 
2145 /// Move the boundary of scheduled code by one SUnit.
2147  // Update the reservation table.
2148  if (HazardRec->isEnabled()) {
2149  if (!isTop() && SU->isCall) {
2150  // Calls are scheduled with their preceding instructions. For bottom-up
2151  // scheduling, clear the pipeline state before emitting.
2152  HazardRec->Reset();
2153  }
2154  HazardRec->EmitInstruction(SU);
2155  }
2156  // checkHazard should prevent scheduling multiple instructions per cycle that
2157  // exceed the issue width.
2158  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2159  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2160  assert(
2161  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2162  "Cannot schedule this instruction's MicroOps in the current cycle.");
2163 
2164  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2165  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2166 
2167  unsigned NextCycle = CurrCycle;
2168  switch (SchedModel->getMicroOpBufferSize()) {
2169  case 0:
2170  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2171  break;
2172  case 1:
2173  if (ReadyCycle > NextCycle) {
2174  NextCycle = ReadyCycle;
2175  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2176  }
2177  break;
2178  default:
2179  // We don't currently model the OOO reorder buffer, so consider all
2180  // scheduled MOps to be "retired". We do loosely model in-order resource
2181  // latency. If this instruction uses an in-order resource, account for any
2182  // likely stall cycles.
2183  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2184  NextCycle = ReadyCycle;
2185  break;
2186  }
2187  RetiredMOps += IncMOps;
2188 
2189  // Update resource counts and critical resource.
2190  if (SchedModel->hasInstrSchedModel()) {
2191  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2192  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2193  Rem->RemIssueCount -= DecRemIssue;
2194  if (ZoneCritResIdx) {
2195  // Scale scheduled micro-ops for comparing with the critical resource.
2196  unsigned ScaledMOps =
2197  RetiredMOps * SchedModel->getMicroOpFactor();
2198 
2199  // If scaled micro-ops are now more than the previous critical resource by
2200  // a full cycle, then micro-ops issue becomes critical.
2201  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2202  >= (int)SchedModel->getLatencyFactor()) {
2203  ZoneCritResIdx = 0;
2204  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2205  << ScaledMOps / SchedModel->getLatencyFactor()
2206  << "c\n");
2207  }
2208  }
2210  PI = SchedModel->getWriteProcResBegin(SC),
2211  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2212  unsigned RCycle =
2213  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2214  if (RCycle > NextCycle)
2215  NextCycle = RCycle;
2216  }
2217  if (SU->hasReservedResource) {
2218  // For reserved resources, record the highest cycle using the resource.
2219  // For top-down scheduling, this is the cycle in which we schedule this
2220  // instruction plus the number of cycles the operations reserves the
2221  // resource. For bottom-up is it simply the instruction's cycle.
2223  PI = SchedModel->getWriteProcResBegin(SC),
2224  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2225  unsigned PIdx = PI->ProcResourceIdx;
2226  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2227  if (isTop()) {
2228  ReservedCycles[PIdx] =
2229  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2230  }
2231  else
2232  ReservedCycles[PIdx] = NextCycle;
2233  }
2234  }
2235  }
2236  }
2237  // Update ExpectedLatency and DependentLatency.
2238  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2239  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2240  if (SU->getDepth() > TopLatency) {
2241  TopLatency = SU->getDepth();
2242  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2243  << SU->NodeNum << ") " << TopLatency << "c\n");
2244  }
2245  if (SU->getHeight() > BotLatency) {
2246  BotLatency = SU->getHeight();
2247  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2248  << SU->NodeNum << ") " << BotLatency << "c\n");
2249  }
2250  // If we stall for any reason, bump the cycle.
2251  if (NextCycle > CurrCycle)
2252  bumpCycle(NextCycle);
2253  else
2254  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2255  // resource limited. If a stall occurred, bumpCycle does this.
2256  IsResourceLimited =
2257  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2258  getScheduledLatency());
2259 
2260  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2261  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2262  // one cycle. Since we commonly reach the max MOps here, opportunistically
2263  // bump the cycle to avoid uselessly checking everything in the readyQ.
2264  CurrMOps += IncMOps;
2265 
2266  // Bump the cycle count for issue group constraints.
2267  // This must be done after NextCycle has been adjust for all other stalls.
2268  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2269  // currCycle to X.
2270  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2271  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2272  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2273  << " group\n");
2274  bumpCycle(++NextCycle);
2275  }
2276 
2277  while (CurrMOps >= SchedModel->getIssueWidth()) {
2278  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2279  << CurrCycle << '\n');
2280  bumpCycle(++NextCycle);
2281  }
2282  LLVM_DEBUG(dumpScheduledState());
2283 }
2284 
2285 /// Release pending ready nodes in to the available queue. This makes them
2286 /// visible to heuristics.
2288  // If the available queue is empty, it is safe to reset MinReadyCycle.
2289  if (Available.empty())
2290  MinReadyCycle = std::numeric_limits<unsigned>::max();
2291 
2292  // Check to see if any of the pending instructions are ready to issue. If
2293  // so, add them to the available queue.
2294  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2295  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2296  SUnit *SU = *(Pending.begin()+i);
2297  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2298 
2299  if (ReadyCycle < MinReadyCycle)
2300  MinReadyCycle = ReadyCycle;
2301 
2302  if (!IsBuffered && ReadyCycle > CurrCycle)
2303  continue;
2304 
2305  if (checkHazard(SU))
2306  continue;
2307 
2308  if (Available.size() >= ReadyListLimit)
2309  break;
2310 
2311  Available.push(SU);
2312  Pending.remove(Pending.begin()+i);
2313  --i; --e;
2314  }
2315  CheckPending = false;
2316 }
2317 
2318 /// Remove SU from the ready set for this boundary.
2320  if (Available.isInQueue(SU))
2321  Available.remove(Available.find(SU));
2322  else {
2323  assert(Pending.isInQueue(SU) && "bad ready count");
2324  Pending.remove(Pending.find(SU));
2325  }
2326 }
2327 
2328 /// If this queue only has one ready candidate, return it. As a side effect,
2329 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2330 /// one node is ready. If multiple instructions are ready, return NULL.
2332  if (CheckPending)
2333  releasePending();
2334 
2335  if (CurrMOps > 0) {
2336  // Defer any ready instrs that now have a hazard.
2337  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2338  if (checkHazard(*I)) {
2339  Pending.push(*I);
2340  I = Available.remove(I);
2341  continue;
2342  }
2343  ++I;
2344  }
2345  }
2346  for (unsigned i = 0; Available.empty(); ++i) {
2347 // FIXME: Re-enable assert once PR20057 is resolved.
2348 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2349 // "permanent hazard");
2350  (void)i;
2351  bumpCycle(CurrCycle + 1);
2352  releasePending();
2353  }
2354 
2355  LLVM_DEBUG(Pending.dump());
2356  LLVM_DEBUG(Available.dump());
2357 
2358  if (Available.size() == 1)
2359  return *Available.begin();
2360  return nullptr;
2361 }
2362 
2363 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2364 // This is useful information to dump after bumpNode.
2365 // Note that the Queue contents are more useful before pickNodeFromQueue.
2367  unsigned ResFactor;
2368  unsigned ResCount;
2369  if (ZoneCritResIdx) {
2370  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2371  ResCount = getResourceCount(ZoneCritResIdx);
2372  } else {
2373  ResFactor = SchedModel->getMicroOpFactor();
2374  ResCount = RetiredMOps * ResFactor;
2375  }
2376  unsigned LFactor = SchedModel->getLatencyFactor();
2377  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2378  << " Retired: " << RetiredMOps;
2379  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2380  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2381  << ResCount / ResFactor << " "
2382  << SchedModel->getResourceName(ZoneCritResIdx)
2383  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2384  << (IsResourceLimited ? " - Resource" : " - Latency")
2385  << " limited.\n";
2386 }
2387 #endif
2388 
2389 //===----------------------------------------------------------------------===//
2390 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2391 //===----------------------------------------------------------------------===//
2392 
2395  const TargetSchedModel *SchedModel) {
2396  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2397  return;
2398 
2399  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2401  PI = SchedModel->getWriteProcResBegin(SC),
2402  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2403  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2404  ResDelta.CritResources += PI->Cycles;
2405  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2406  ResDelta.DemandedResources += PI->Cycles;
2407  }
2408 }
2409 
2410 /// Compute remaining latency. We need this both to determine whether the
2411 /// overall schedule has become latency-limited and whether the instructions
2412 /// outside this zone are resource or latency limited.
2413 ///
2414 /// The "dependent" latency is updated incrementally during scheduling as the
2415 /// max height/depth of scheduled nodes minus the cycles since it was
2416 /// scheduled:
2417 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2418 ///
2419 /// The "independent" latency is the max ready queue depth:
2420 /// ILat = max N.depth for N in Available|Pending
2421 ///
2422 /// RemainingLatency is the greater of independent and dependent latency.
2423 ///
2424 /// These computations are expensive, especially in DAGs with many edges, so
2425 /// only do them if necessary.
2426 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2427  unsigned RemLatency = CurrZone.getDependentLatency();
2428  RemLatency = std::max(RemLatency,
2429  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2430  RemLatency = std::max(RemLatency,
2431  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2432  return RemLatency;
2433 }
2434 
2435 /// Returns true if the current cycle plus remaning latency is greater than
2436 /// the cirtical path in the scheduling region.
2437 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2438  SchedBoundary &CurrZone,
2439  bool ComputeRemLatency,
2440  unsigned &RemLatency) const {
2441  // The current cycle is already greater than the critical path, so we are
2442  // already latnecy limited and don't need to compute the remaining latency.
2443  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2444  return true;
2445 
2446  // If we haven't scheduled anything yet, then we aren't latency limited.
2447  if (CurrZone.getCurrCycle() == 0)
2448  return false;
2449 
2450  if (ComputeRemLatency)
2451  RemLatency = computeRemLatency(CurrZone);
2452 
2453  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2454 }
2455 
2456 /// Set the CandPolicy given a scheduling zone given the current resources and
2457 /// latencies inside and outside the zone.
2458 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2459  SchedBoundary &CurrZone,
2460  SchedBoundary *OtherZone) {
2461  // Apply preemptive heuristics based on the total latency and resources
2462  // inside and outside this zone. Potential stalls should be considered before
2463  // following this policy.
2464 
2465  // Compute the critical resource outside the zone.
2466  unsigned OtherCritIdx = 0;
2467  unsigned OtherCount =
2468  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2469 
2470  bool OtherResLimited = false;
2471  unsigned RemLatency = 0;
2472  bool RemLatencyComputed = false;
2473  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2474  RemLatency = computeRemLatency(CurrZone);
2475  RemLatencyComputed = true;
2476  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2477  OtherCount, RemLatency);
2478  }
2479 
2480  // Schedule aggressively for latency in PostRA mode. We don't check for
2481  // acyclic latency during PostRA, and highly out-of-order processors will
2482  // skip PostRA scheduling.
2483  if (!OtherResLimited &&
2484  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2485  RemLatency))) {
2486  Policy.ReduceLatency |= true;
2487  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2488  << " RemainingLatency " << RemLatency << " + "
2489  << CurrZone.getCurrCycle() << "c > CritPath "
2490  << Rem.CriticalPath << "\n");
2491  }
2492  // If the same resource is limiting inside and outside the zone, do nothing.
2493  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2494  return;
2495 
2496  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2497  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2498  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2499  } if (OtherResLimited) dbgs()
2500  << " RemainingLimit: "
2501  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2502  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2503  << " Latency limited both directions.\n");
2504 
2505  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2506  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2507 
2508  if (OtherResLimited)
2509  Policy.DemandResIdx = OtherCritIdx;
2510 }
2511 
2512 #ifndef NDEBUG
2515  switch (Reason) {
2516  case NoCand: return "NOCAND ";
2517  case Only1: return "ONLY1 ";
2518  case PhysRegCopy: return "PREG-COPY ";
2519  case RegExcess: return "REG-EXCESS";
2520  case RegCritical: return "REG-CRIT ";
2521  case Stall: return "STALL ";
2522  case Cluster: return "CLUSTER ";
2523  case Weak: return "WEAK ";
2524  case RegMax: return "REG-MAX ";
2525  case ResourceReduce: return "RES-REDUCE";
2526  case ResourceDemand: return "RES-DEMAND";
2527  case TopDepthReduce: return "TOP-DEPTH ";
2528  case TopPathReduce: return "TOP-PATH ";
2529  case BotHeightReduce:return "BOT-HEIGHT";
2530  case BotPathReduce: return "BOT-PATH ";
2531  case NextDefUse: return "DEF-USE ";
2532  case NodeOrder: return "ORDER ";
2533  };
2534  llvm_unreachable("Unknown reason!");
2535 }
2536 
2538  PressureChange P;
2539  unsigned ResIdx = 0;
2540  unsigned Latency = 0;
2541  switch (Cand.Reason) {
2542  default:
2543  break;
2544  case RegExcess:
2545  P = Cand.RPDelta.Excess;
2546  break;
2547  case RegCritical:
2548  P = Cand.RPDelta.CriticalMax;
2549  break;
2550  case RegMax:
2551  P = Cand.RPDelta.CurrentMax;
2552  break;
2553  case ResourceReduce:
2554  ResIdx = Cand.Policy.ReduceResIdx;
2555  break;
2556  case ResourceDemand:
2557  ResIdx = Cand.Policy.DemandResIdx;
2558  break;
2559  case TopDepthReduce:
2560  Latency = Cand.SU->getDepth();
2561  break;
2562  case TopPathReduce:
2563  Latency = Cand.SU->getHeight();
2564  break;
2565  case BotHeightReduce:
2566  Latency = Cand.SU->getHeight();
2567  break;
2568  case BotPathReduce:
2569  Latency = Cand.SU->getDepth();
2570  break;
2571  }
2572  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2573  if (P.isValid())
2574  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2575  << ":" << P.getUnitInc() << " ";
2576  else
2577  dbgs() << " ";
2578  if (ResIdx)
2579  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2580  else
2581  dbgs() << " ";
2582  if (Latency)
2583  dbgs() << " " << Latency << " cycles ";
2584  else
2585  dbgs() << " ";
2586  dbgs() << '\n';
2587 }
2588 #endif
2589 
2590 namespace llvm {
2591 /// Return true if this heuristic determines order.
2592 bool tryLess(int TryVal, int CandVal,
2596  if (TryVal < CandVal) {
2597  TryCand.Reason = Reason;
2598  return true;
2599  }
2600  if (TryVal > CandVal) {
2601  if (Cand.Reason > Reason)
2602  Cand.Reason = Reason;
2603  return true;
2604  }
2605  return false;
2606 }
2607 
2608 bool tryGreater(int TryVal, int CandVal,
2612  if (TryVal > CandVal) {
2613  TryCand.Reason = Reason;
2614  return true;
2615  }
2616  if (TryVal < CandVal) {
2617  if (Cand.Reason > Reason)
2618  Cand.Reason = Reason;
2619  return true;
2620  }
2621  return false;
2622 }
2623 
2626  SchedBoundary &Zone) {
2627  if (Zone.isTop()) {
2628  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2629  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2630  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2631  return true;
2632  }
2633  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2634  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2635  return true;
2636  } else {
2637  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2638  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2639  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2640  return true;
2641  }
2642  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2643  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2644  return true;
2645  }
2646  return false;
2647 }
2648 } // end namespace llvm
2649 
2650 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2651  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2652  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2653 }
2654 
2656  tracePick(Cand.Reason, Cand.AtTop);
2657 }
2658 
2660  assert(dag->hasVRegLiveness() &&
2661  "(PreRA)GenericScheduler needs vreg liveness");
2662  DAG = static_cast<ScheduleDAGMILive*>(dag);
2663  SchedModel = DAG->getSchedModel();
2664  TRI = DAG->TRI;
2665 
2666  Rem.init(DAG, SchedModel);
2667  Top.init(DAG, SchedModel, &Rem);
2668  Bot.init(DAG, SchedModel, &Rem);
2669 
2670  // Initialize resource counts.
2671 
2672  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2673  // are disabled, then these HazardRecs will be disabled.
2674  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2675  if (!Top.HazardRec) {
2676  Top.HazardRec =
2677  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2678  Itin, DAG);
2679  }
2680  if (!Bot.HazardRec) {
2681  Bot.HazardRec =
2682  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2683  Itin, DAG);
2684  }
2685  TopCand.SU = nullptr;
2686  BotCand.SU = nullptr;
2687 }
2688 
2689 /// Initialize the per-region scheduling policy.
2692  unsigned NumRegionInstrs) {
2693  const MachineFunction &MF = *Begin->getMF();
2694  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2695 
2696  // Avoid setting up the register pressure tracker for small regions to save
2697  // compile time. As a rough heuristic, only track pressure when the number of
2698  // schedulable instructions exceeds half the integer register file.
2699  RegionPolicy.ShouldTrackPressure = true;
2700  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2701  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2702  if (TLI->isTypeLegal(LegalIntVT)) {
2703  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2704  TLI->getRegClassFor(LegalIntVT));
2705  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2706  }
2707  }
2708 
2709  // For generic targets, we default to bottom-up, because it's simpler and more
2710  // compile-time optimizations have been implemented in that direction.
2711  RegionPolicy.OnlyBottomUp = true;
2712 
2713  // Allow the subtarget to override default policy.
2714  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2715 
2716  // After subtarget overrides, apply command line options.
2717  if (!EnableRegPressure)
2718  RegionPolicy.ShouldTrackPressure = false;
2719 
2720  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2721  // e.g. -misched-bottomup=false allows scheduling in both directions.
2723  "-misched-topdown incompatible with -misched-bottomup");
2724  if (ForceBottomUp.getNumOccurrences() > 0) {
2725  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2726  if (RegionPolicy.OnlyBottomUp)
2727  RegionPolicy.OnlyTopDown = false;
2728  }
2729  if (ForceTopDown.getNumOccurrences() > 0) {
2730  RegionPolicy.OnlyTopDown = ForceTopDown;
2731  if (RegionPolicy.OnlyTopDown)
2732  RegionPolicy.OnlyBottomUp = false;
2733  }
2734 }
2735 
2737  // Cannot completely remove virtual function even in release mode.
2738 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2739  dbgs() << "GenericScheduler RegionPolicy: "
2740  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2741  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2742  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2743  << "\n";
2744 #endif
2745 }
2746 
2747 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2748 /// critical path by more cycles than it takes to drain the instruction buffer.
2749 /// We estimate an upper bounds on in-flight instructions as:
2750 ///
2751 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2752 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2753 /// InFlightResources = InFlightIterations * LoopResources
2754 ///
2755 /// TODO: Check execution resources in addition to IssueCount.
2757  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2758  return;
2759 
2760  // Scaled number of cycles per loop iteration.
2761  unsigned IterCount =
2762  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2763  Rem.RemIssueCount);
2764  // Scaled acyclic critical path.
2765  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2766  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2767  unsigned InFlightCount =
2768  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2769  unsigned BufferLimit =
2770  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2771 
2772  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2773 
2774  LLVM_DEBUG(
2775  dbgs() << "IssueCycles="
2776  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2777  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2778  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2779  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2780  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2781  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2782 }
2783 
2785  Rem.CriticalPath = DAG->ExitSU.getDepth();
2786 
2787  // Some roots may not feed into ExitSU. Check all of them in case.
2788  for (const SUnit *SU : Bot.Available) {
2789  if (SU->getDepth() > Rem.CriticalPath)
2790  Rem.CriticalPath = SU->getDepth();
2791  }
2792  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2793  if (DumpCriticalPathLength) {
2794  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2795  }
2796 
2797  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2798  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2799  checkAcyclicLatency();
2800  }
2801 }
2802 
2803 namespace llvm {
2804 bool tryPressure(const PressureChange &TryP,
2805  const PressureChange &CandP,
2809  const TargetRegisterInfo *TRI,
2810  const MachineFunction &MF) {
2811  // If one candidate decreases and the other increases, go with it.
2812  // Invalid candidates have UnitInc==0.
2813  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2814  Reason)) {
2815  return true;
2816  }
2817  // Do not compare the magnitude of pressure changes between top and bottom
2818  // boundary.
2819  if (Cand.AtTop != TryCand.AtTop)
2820  return false;
2821 
2822  // If both candidates affect the same set in the same boundary, go with the
2823  // smallest increase.
2824  unsigned TryPSet = TryP.getPSetOrMax();
2825  unsigned CandPSet = CandP.getPSetOrMax();
2826  if (TryPSet == CandPSet) {
2827  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2828  Reason);
2829  }
2830 
2831  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2833 
2834  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2836 
2837  // If the candidates are decreasing pressure, reverse priority.
2838  if (TryP.getUnitInc() < 0)
2839  std::swap(TryRank, CandRank);
2840  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2841 }
2842 
2843 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2844  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2845 }
2846 
2847 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2848 /// their physreg def/use.
2849 ///
2850 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2851 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2852 /// with the operation that produces or consumes the physreg. We'll do this when
2853 /// regalloc has support for parallel copies.
2854 int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2855  const MachineInstr *MI = SU->getInstr();
2856  if (!MI->isCopy())
2857  return 0;
2858 
2859  unsigned ScheduledOper = isTop ? 1 : 0;
2860  unsigned UnscheduledOper = isTop ? 0 : 1;
2861  // If we have already scheduled the physreg produce/consumer, immediately
2862  // schedule the copy.
2864  MI->getOperand(ScheduledOper).getReg()))
2865  return 1;
2866  // If the physreg is at the boundary, defer it. Otherwise schedule it
2867  // immediately to free the dependent. We can hoist the copy later.
2868  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2870  MI->getOperand(UnscheduledOper).getReg()))
2871  return AtBoundary ? -1 : 1;
2872  return 0;
2873 }
2874 } // end namespace llvm
2875 
2877  bool AtTop,
2878  const RegPressureTracker &RPTracker,
2879  RegPressureTracker &TempTracker) {
2880  Cand.SU = SU;
2881  Cand.AtTop = AtTop;
2882  if (DAG->isTrackingPressure()) {
2883  if (AtTop) {
2884  TempTracker.getMaxDownwardPressureDelta(
2885  Cand.SU->getInstr(),
2886  Cand.RPDelta,
2887  DAG->getRegionCriticalPSets(),
2889  } else {
2890  if (VerifyScheduling) {
2891  TempTracker.getMaxUpwardPressureDelta(
2892  Cand.SU->getInstr(),
2893  &DAG->getPressureDiff(Cand.SU),
2894  Cand.RPDelta,
2895  DAG->getRegionCriticalPSets(),
2897  } else {
2898  RPTracker.getUpwardPressureDelta(
2899  Cand.SU->getInstr(),
2900  DAG->getPressureDiff(Cand.SU),
2901  Cand.RPDelta,
2902  DAG->getRegionCriticalPSets(),
2904  }
2905  }
2906  }
2907  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2908  << " Try SU(" << Cand.SU->NodeNum << ") "
2909  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2910  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2911 }
2912 
2913 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2914 /// hierarchical. This may be more efficient than a graduated cost model because
2915 /// we don't need to evaluate all aspects of the model for each node in the
2916 /// queue. But it's really done to make the heuristics easier to debug and
2917 /// statistically analyze.
2918 ///
2919 /// \param Cand provides the policy and current best candidate.
2920 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2921 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2922 // if Cand is from a different zone than TryCand.
2924  SchedCandidate &TryCand,
2925  SchedBoundary *Zone) const {
2926  // Initialize the candidate if needed.
2927  if (!Cand.isValid()) {
2928  TryCand.Reason = NodeOrder;
2929  return;
2930  }
2931 
2932  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2933  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2934  TryCand, Cand, PhysRegCopy))
2935  return;
2936 
2937  // Avoid exceeding the target's limit.
2938  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2939  Cand.RPDelta.Excess,
2940  TryCand, Cand, RegExcess, TRI,
2941  DAG->MF))
2942  return;
2943 
2944  // Avoid increasing the max critical pressure in the scheduled region.
2945  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2946  Cand.RPDelta.CriticalMax,
2947  TryCand, Cand, RegCritical, TRI,
2948  DAG->MF))
2949  return;
2950 
2951  // We only compare a subset of features when comparing nodes between
2952  // Top and Bottom boundary. Some properties are simply incomparable, in many
2953  // other instances we should only override the other boundary if something
2954  // is a clear good pick on one boundary. Skip heuristics that are more
2955  // "tie-breaking" in nature.
2956  bool SameBoundary = Zone != nullptr;
2957  if (SameBoundary) {
2958  // For loops that are acyclic path limited, aggressively schedule for
2959  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2960  // heuristics to take precedence.
2961  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2962  tryLatency(TryCand, Cand, *Zone))
2963  return;
2964 
2965  // Prioritize instructions that read unbuffered resources by stall cycles.
2966  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2967  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2968  return;
2969  }
2970 
2971  // Keep clustered nodes together to encourage downstream peephole
2972  // optimizations which may reduce resource requirements.
2973  //
2974  // This is a best effort to set things up for a post-RA pass. Optimizations
2975  // like generating loads of multiple registers should ideally be done within
2976  // the scheduler pass by combining the loads during DAG postprocessing.
2977  const SUnit *CandNextClusterSU =
2978  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2979  const SUnit *TryCandNextClusterSU =
2980  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2981  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2982  Cand.SU == CandNextClusterSU,
2983  TryCand, Cand, Cluster))
2984  return;
2985 
2986  if (SameBoundary) {
2987  // Weak edges are for clustering and other constraints.
2988  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2989  getWeakLeft(Cand.SU, Cand.AtTop),
2990  TryCand, Cand, Weak))
2991  return;
2992  }
2993 
2994  // Avoid increasing the max pressure of the entire region.
2995  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2996  Cand.RPDelta.CurrentMax,
2997  TryCand, Cand, RegMax, TRI,
2998  DAG->MF))
2999  return;
3000 
3001  if (SameBoundary) {
3002  // Avoid critical resource consumption and balance the schedule.
3003  TryCand.initResourceDelta(DAG, SchedModel);
3005  TryCand, Cand, ResourceReduce))
3006  return;
3009  TryCand, Cand, ResourceDemand))
3010  return;
3011 
3012  // Avoid serializing long latency dependence chains.
3013  // For acyclic path limited loops, latency was already checked above.
3014  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3015  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3016  return;
3017 
3018  // Fall through to original instruction order.
3019  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3020  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3021  TryCand.Reason = NodeOrder;
3022  }
3023  }
3024 }
3025 
3026 /// Pick the best candidate from the queue.
3027 ///
3028 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3029 /// DAG building. To adjust for the current scheduling location we need to
3030 /// maintain the number of vreg uses remaining to be top-scheduled.
3032  const CandPolicy &ZonePolicy,
3033  const RegPressureTracker &RPTracker,
3034  SchedCandidate &Cand) {
3035  // getMaxPressureDelta temporarily modifies the tracker.
3036  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3037 
3038  ReadyQueue &Q = Zone.Available;
3039  for (SUnit *SU : Q) {
3040 
3041  SchedCandidate TryCand(ZonePolicy);
3042  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3043  // Pass SchedBoundary only when comparing nodes from the same boundary.
3044  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3045  tryCandidate(Cand, TryCand, ZoneArg);
3046  if (TryCand.Reason != NoCand) {
3047  // Initialize resource delta if needed in case future heuristics query it.
3048  if (TryCand.ResDelta == SchedResourceDelta())
3049  TryCand.initResourceDelta(DAG, SchedModel);
3050  Cand.setBest(TryCand);
3051  LLVM_DEBUG(traceCandidate(Cand));
3052  }
3053  }
3054 }
3055 
3056 /// Pick the best candidate node from either the top or bottom queue.
3058  // Schedule as far as possible in the direction of no choice. This is most
3059  // efficient, but also provides the best heuristics for CriticalPSets.
3060  if (SUnit *SU = Bot.pickOnlyChoice()) {
3061  IsTopNode = false;
3062  tracePick(Only1, false);
3063  return SU;
3064  }
3065  if (SUnit *SU = Top.pickOnlyChoice()) {
3066  IsTopNode = true;
3067  tracePick(Only1, true);
3068  return SU;
3069  }
3070  // Set the bottom-up policy based on the state of the current bottom zone and
3071  // the instructions outside the zone, including the top zone.
3072  CandPolicy BotPolicy;
3073  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3074  // Set the top-down policy based on the state of the current top zone and
3075  // the instructions outside the zone, including the bottom zone.
3076  CandPolicy TopPolicy;
3077  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3078 
3079  // See if BotCand is still valid (because we previously scheduled from Top).
3080  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3081  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3082  BotCand.Policy != BotPolicy) {
3083  BotCand.reset(CandPolicy());
3084  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3085  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3086  } else {
3087  LLVM_DEBUG(traceCandidate(BotCand));
3088 #ifndef NDEBUG
3089  if (VerifyScheduling) {
3090  SchedCandidate TCand;
3091  TCand.reset(CandPolicy());
3092  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3093  assert(TCand.SU == BotCand.SU &&
3094  "Last pick result should correspond to re-picking right now");
3095  }
3096 #endif
3097  }
3098 
3099  // Check if the top Q has a better candidate.
3100  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3101  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3102  TopCand.Policy != TopPolicy) {
3103  TopCand.reset(CandPolicy());
3104  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3105  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3106  } else {
3107  LLVM_DEBUG(traceCandidate(TopCand));
3108 #ifndef NDEBUG
3109  if (VerifyScheduling) {
3110  SchedCandidate TCand;
3111  TCand.reset(CandPolicy());
3112  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3113  assert(TCand.SU == TopCand.SU &&
3114  "Last pick result should correspond to re-picking right now");
3115  }
3116 #endif
3117  }
3118 
3119  // Pick best from BotCand and TopCand.
3120  assert(BotCand.isValid());
3121  assert(TopCand.isValid());
3122  SchedCandidate Cand = BotCand;
3123  TopCand.Reason = NoCand;
3124  tryCandidate(Cand, TopCand, nullptr);
3125  if (TopCand.Reason != NoCand) {
3126  Cand.setBest(TopCand);
3127  LLVM_DEBUG(traceCandidate(Cand));
3128  }
3129 
3130  IsTopNode = Cand.AtTop;
3131  tracePick(Cand);
3132  return Cand.SU;
3133 }
3134 
3135 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3137  if (DAG->top() == DAG->bottom()) {
3138  assert(Top.Available.empty() && Top.Pending.empty() &&
3139  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3140  return nullptr;
3141  }
3142  SUnit *SU;
3143  do {
3144  if (RegionPolicy.OnlyTopDown) {
3145  SU = Top.pickOnlyChoice();
3146  if (!SU) {
3147  CandPolicy NoPolicy;
3148  TopCand.reset(NoPolicy);
3149  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3150  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3151  tracePick(TopCand);
3152  SU = TopCand.SU;
3153  }
3154  IsTopNode = true;
3155  } else if (RegionPolicy.OnlyBottomUp) {
3156  SU = Bot.pickOnlyChoice();
3157  if (!SU) {
3158  CandPolicy NoPolicy;
3159  BotCand.reset(NoPolicy);
3160  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3161  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3162  tracePick(BotCand);
3163  SU = BotCand.SU;
3164  }
3165  IsTopNode = false;
3166  } else {
3167  SU = pickNodeBidirectional(IsTopNode);
3168  }
3169  } while (SU->isScheduled);
3170 
3171  if (SU->isTopReady())
3172  Top.removeReady(SU);
3173  if (SU->isBottomReady())
3174  Bot.removeReady(SU);
3175 
3176  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3177  << *SU->getInstr());
3178  return SU;
3179 }
3180 
3182  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3183  if (!isTop)
3184  ++InsertPos;
3185  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3186 
3187  // Find already scheduled copies with a single physreg dependence and move
3188  // them just above the scheduled instruction.
3189  for (SDep &Dep : Deps) {
3190  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3191  continue;
3192  SUnit *DepSU = Dep.getSUnit();
3193  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3194  continue;
3195  MachineInstr *Copy = DepSU->getInstr();
3196  if (!Copy->isCopy())
3197  continue;
3198  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3199  DAG->dumpNode(*Dep.getSUnit()));
3200  DAG->moveInstruction(Copy, InsertPos);
3201  }
3202 }
3203 
3204 /// Update the scheduler's state after scheduling a node. This is the same node
3205 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3206 /// update it's state based on the current cycle before MachineSchedStrategy
3207 /// does.
3208 ///
3209 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3210 /// them here. See comments in biasPhysRegCopy.
3211 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3212  if (IsTopNode) {
3213  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3214  Top.bumpNode(SU);
3215  if (SU->hasPhysRegUses)
3216  reschedulePhysRegCopies(SU, true);
3217  } else {
3218  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3219  Bot.bumpNode(SU);
3220  if (SU->hasPhysRegDefs)
3221  reschedulePhysRegCopies(SU, false);
3222  }
3223 }
3224 
3225 /// Create the standard converging machine scheduler. This will be used as the
3226 /// default scheduler if the target does not set a default.
3228  ScheduleDAGMILive *DAG =
3229  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3230  // Register DAG post-processors.
3231  //
3232  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3233  // data and pass it to later mutations. Have a single mutation that gathers
3234  // the interesting nodes in one pass.
3236  return DAG;
3237 }
3238 
3240  return createGenericSchedLive(C);
3241 }
3242 
3243 static MachineSchedRegistry
3244 GenericSchedRegistry("converge", "Standard converging scheduler.",
3246 
3247 //===----------------------------------------------------------------------===//
3248 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3249 //===----------------------------------------------------------------------===//
3250 
3252  DAG = Dag;
3253  SchedModel = DAG->getSchedModel();
3254  TRI = DAG->TRI;
3255 
3256  Rem.init(DAG, SchedModel);
3257  Top.init(DAG, SchedModel, &Rem);
3258  BotRoots.clear();
3259 
3260  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3261  // or are disabled, then these HazardRecs will be disabled.
3262  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3263  if (!Top.HazardRec) {
3264  Top.HazardRec =
3266  Itin, DAG);
3267  }
3268 }
3269 
3271  Rem.CriticalPath = DAG->ExitSU.getDepth();
3272 
3273  // Some roots may not feed into ExitSU. Check all of them in case.
3274  for (const SUnit *SU : BotRoots) {
3275  if (SU->getDepth() > Rem.CriticalPath)
3276  Rem.CriticalPath = SU->getDepth();
3277  }
3278  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3279  if (DumpCriticalPathLength) {
3280  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3281  }
3282 }
3283 
3284 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3285 ///
3286 /// \param Cand provides the policy and current best candidate.
3287 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3289  SchedCandidate &TryCand) {
3290  // Initialize the candidate if needed.
3291  if (!Cand.isValid()) {
3292  TryCand.Reason = NodeOrder;
3293  return;
3294  }
3295 
3296  // Prioritize instructions that read unbuffered resources by stall cycles.
3297  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3298  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3299  return;
3300 
3301  // Keep clustered nodes together.
3302  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3303  Cand.SU == DAG->getNextClusterSucc(),
3304  TryCand, Cand, Cluster))
3305  return;
3306 
3307  // Avoid critical resource consumption and balance the schedule.
3308  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3309  TryCand, Cand, ResourceReduce))
3310  return;
3312  Cand.ResDelta.DemandedResources,
3313  TryCand, Cand, ResourceDemand))
3314  return;
3315 
3316  // Avoid serializing long latency dependence chains.
3317  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3318  return;
3319  }
3320 
3321  // Fall through to original instruction order.
3322  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3323  TryCand.Reason = NodeOrder;
3324 }
3325 
3327  ReadyQueue &Q = Top.Available;
3328  for (SUnit *SU : Q) {
3329  SchedCandidate TryCand(Cand.Policy);
3330  TryCand.SU = SU;
3331  TryCand.AtTop = true;
3332  TryCand.initResourceDelta(DAG, SchedModel);
3333  tryCandidate(Cand, TryCand);
3334  if (TryCand.Reason != NoCand) {
3335  Cand.setBest(TryCand);
3336  LLVM_DEBUG(traceCandidate(Cand));
3337  }
3338  }
3339 }
3340 
3341 /// Pick the next node to schedule.
3343  if (DAG->top() == DAG->bottom()) {
3344  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3345  return nullptr;
3346  }
3347  SUnit *SU;
3348  do {
3349  SU = Top.pickOnlyChoice();
3350  if (SU) {
3351  tracePick(Only1, true);
3352  } else {
3353  CandPolicy NoPolicy;
3354  SchedCandidate TopCand(NoPolicy);
3355  // Set the top-down policy based on the state of the current top zone and
3356  // the instructions outside the zone, including the bottom zone.
3357  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3358  pickNodeFromQueue(TopCand);
3359  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3360  tracePick(TopCand);
3361  SU = TopCand.SU;
3362  }
3363  } while (SU->isScheduled);
3364 
3365  IsTopNode = true;
3366  Top.removeReady(SU);
3367 
3368  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3369  << *SU->getInstr());
3370  return SU;
3371 }
3372 
3373 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3374 /// scheduled/remaining flags in the DAG nodes.
3375 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3376  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3377  Top.bumpNode(SU);
3378 }
3379 
3381  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3382  /*RemoveKillFlags=*/true);
3383 }
3384 
3385 //===----------------------------------------------------------------------===//
3386 // ILP Scheduler. Currently for experimental analysis of heuristics.
3387 //===----------------------------------------------------------------------===//
3388 
3389 namespace {
3390 
3391 /// Order nodes by the ILP metric.
3392 struct ILPOrder {
3393  const SchedDFSResult *DFSResult = nullptr;
3394  const BitVector *ScheduledTrees = nullptr;
3395  bool MaximizeILP;
3396 
3397  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3398 
3399  /// Apply a less-than relation on node priority.
3400  ///
3401  /// (Return true if A comes after B in the Q.)
3402  bool operator()(const SUnit *A, const SUnit *B) const {
3403  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3404  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3405  if (SchedTreeA != SchedTreeB) {
3406  // Unscheduled trees have lower priority.
3407  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3408  return ScheduledTrees->test(SchedTreeB);
3409 
3410  // Trees with shallower connections have have lower priority.
3411  if (DFSResult->getSubtreeLevel(SchedTreeA)
3412  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3413  return DFSResult->getSubtreeLevel(SchedTreeA)
3414  < DFSResult->getSubtreeLevel(SchedTreeB);
3415  }
3416  }
3417  if (MaximizeILP)
3418  return DFSResult->getILP(A) < DFSResult->getILP(B);
3419  else
3420  return DFSResult->getILP(A) > DFSResult->getILP(B);
3421  }
3422 };
3423 
3424 /// Schedule based on the ILP metric.
3425 class ILPScheduler : public MachineSchedStrategy {
3426  ScheduleDAGMILive *DAG = nullptr;
3427  ILPOrder Cmp;
3428 
3429  std::vector<SUnit*> ReadyQ;
3430 
3431 public:
3432  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3433 
3434  void initialize(ScheduleDAGMI *dag) override {
3435  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3436  DAG = static_cast<ScheduleDAGMILive*>(dag);
3437  DAG->computeDFSResult();
3438  Cmp.DFSResult = DAG->getDFSResult();
3439  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3440  ReadyQ.clear();
3441  }
3442 
3443  void registerRoots() override {
3444  // Restore the heap in ReadyQ with the updated DFS results.
3445  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3446  }
3447 
3448  /// Implement MachineSchedStrategy interface.
3449  /// -----------------------------------------
3450 
3451  /// Callback to select the highest priority node from the ready Q.
3452  SUnit *pickNode(bool &IsTopNode) override {
3453  if (ReadyQ.empty()) return nullptr;
3454  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3455  SUnit *SU = ReadyQ.back();
3456  ReadyQ.pop_back();
3457  IsTopNode = false;
3458  LLVM_DEBUG(dbgs() << "Pick node "
3459  << "SU(" << SU->NodeNum << ") "
3460  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3461  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3462  << " @"
3463  << DAG->getDFSResult()->getSubtreeLevel(
3464  DAG->getDFSResult()->getSubtreeID(SU))
3465  << '\n'
3466  << "Scheduling " << *SU->getInstr());
3467  return SU;
3468  }
3469 
3470  /// Scheduler callback to notify that a new subtree is scheduled.
3471  void scheduleTree(unsigned SubtreeID) override {
3472  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3473  }
3474 
3475  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3476  /// DFSResults, and resort the priority Q.
3477  void schedNode(SUnit *SU, bool IsTopNode) override {
3478  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3479  }
3480 
3481  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3482 
3483  void releaseBottomNode(SUnit *SU) override {
3484  ReadyQ.push_back(SU);
3485  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3486  }
3487 };
3488 
3489 } // end anonymous namespace
3490 
3492  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3493 }
3495  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3496 }
3497 
3499  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3501  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3502 
3503 //===----------------------------------------------------------------------===//
3504 // Machine Instruction Shuffler for Correctness Testing
3505 //===----------------------------------------------------------------------===//
3506 
3507 #ifndef NDEBUG
3508 namespace {
3509 
3510 /// Apply a less-than relation on the node order, which corresponds to the
3511 /// instruction order prior to scheduling. IsReverse implements greater-than.
3512 template<bool IsReverse>
3513 struct SUnitOrder {
3514  bool operator()(SUnit *A, SUnit *B) const {
3515  if (IsReverse)
3516  return A->NodeNum > B->NodeNum;
3517  else
3518  return A->NodeNum < B->NodeNum;
3519  }
3520 };
3521 
3522 /// Reorder instructions as much as possible.
3523 class InstructionShuffler : public MachineSchedStrategy {
3524  bool IsAlternating;
3525  bool IsTopDown;
3526 
3527  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3528  // gives nodes with a higher number higher priority causing the latest
3529  // instructions to be scheduled first.
3530  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3531  TopQ;
3532 
3533  // When scheduling bottom-up, use greater-than as the queue priority.
3535  BottomQ;
3536 
3537 public:
3538  InstructionShuffler(bool alternate, bool topdown)
3539  : IsAlternating(alternate), IsTopDown(topdown) {}
3540 
3541  void initialize(ScheduleDAGMI*) override {
3542  TopQ.clear();
3543  BottomQ.clear();
3544  }
3545 
3546  /// Implement MachineSchedStrategy interface.
3547  /// -----------------------------------------
3548 
3549  SUnit *pickNode(bool &IsTopNode) override {
3550  SUnit *SU;
3551  if (IsTopDown) {
3552  do {
3553  if (TopQ.empty()) return nullptr;
3554  SU = TopQ.top();
3555  TopQ.pop();
3556  } while (SU->isScheduled);
3557  IsTopNode = true;
3558  } else {
3559  do {
3560  if (BottomQ.empty()) return nullptr;
3561  SU = BottomQ.top();
3562  BottomQ.pop();
3563  } while (SU->isScheduled);
3564  IsTopNode = false;
3565  }
3566  if (IsAlternating)
3567  IsTopDown = !IsTopDown;
3568  return SU;
3569  }
3570 
3571  void schedNode(SUnit *SU, bool IsTopNode) override {}
3572 
3573  void releaseTopNode(SUnit *SU) override {
3574  TopQ.push(SU);
3575  }
3576  void releaseBottomNode(SUnit *SU) override {
3577  BottomQ.push(SU);
3578  }
3579 };
3580 
3581 } // end anonymous namespace
3582 
3584  bool Alternate = !ForceTopDown && !ForceBottomUp;
3585  bool TopDown = !ForceBottomUp;
3586  assert((TopDown || !ForceTopDown) &&
3587  "-misched-topdown incompatible with -misched-bottomup");
3588  return new ScheduleDAGMILive(
3589  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3590 }
3591 
3593  "shuffle", "Shuffle machine instructions alternating directions",
3595 #endif // !NDEBUG
3596 
3597 //===----------------------------------------------------------------------===//
3598 // GraphWriter support for ScheduleDAGMILive.
3599 //===----------------------------------------------------------------------===//
3600 
3601 #ifndef NDEBUG
3602 namespace llvm {
3603 
3604 template<> struct GraphTraits<
3606 
3607 template<>
3610 
3611  static std::string getGraphName(const ScheduleDAG *G) {
3612  return G->MF.getName();
3613  }
3614 
3615  static bool renderGraphFromBottomUp() {
3616  return true;
3617  }
3618 
3619  static bool isNodeHidden(const SUnit *Node) {
3620  if (ViewMISchedCutoff == 0)
3621  return false;
3622  return (Node->Preds.size() > ViewMISchedCutoff
3623  || Node->Succs.size() > ViewMISchedCutoff);
3624  }
3625 
3626  /// If you want to override the dot attributes printed for a particular
3627  /// edge, override this method.
3628  static std::string getEdgeAttributes(const SUnit *Node,
3629  SUnitIterator EI,
3630  const ScheduleDAG *Graph) {
3631  if (EI.isArtificialDep())
3632  return "color=cyan,style=dashed";
3633  if (EI.isCtrlDep())
3634  return "color=blue,style=dashed";
3635  return "";
3636  }
3637 
3638  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3639  std::string Str;
3640  raw_string_ostream SS(Str);
3641  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3642  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3643  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3644  SS << "SU:" << SU->NodeNum;
3645  if (DFS)
3646  SS << " I:" << DFS->getNumInstrs(SU);
3647  return SS.str();
3648  }
3649 
3650  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3651  return G->getGraphNodeLabel(SU);
3652  }
3653 
3654  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3655  std::string Str("shape=Mrecord");
3656  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3657  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3658  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3659  if (DFS) {
3660  Str += ",style=filled,fillcolor=\"#";
3661  Str += DOT::getColorString(DFS->getSubtreeID(N));
3662  Str += '"';
3663  }
3664  return Str;
3665  }
3666 };
3667 
3668 } // end namespace llvm
3669 #endif // NDEBUG
3670 
3671 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3672 /// rendered using 'dot'.
3673 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3674 #ifndef NDEBUG
3675  ViewGraph(this, Name, false, Title);
3676 #else
3677  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3678  << "systems with Graphviz or gv!\n";
3679 #endif // NDEBUG
3680 }
3681 
3682 /// Out-of-line implementation with no arguments is handy for gdb.
3684  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3685 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:119
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:75
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
void push_back(const T &Elt)
Definition: SmallVector.h:218
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
Segments::iterator iterator
Definition: LiveInterval.h:208
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:402
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:140
bool test(unsigned Idx) const
Definition: BitVector.h:502
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:453
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:304
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:367
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:564
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:288
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:196
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const RegPressureTracker & getBotRPTracker() const
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:273
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:212
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Result of a LiveRange query.
Definition: LiveInterval.h:90
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:283
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1173
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
PressureDiff & getPressureDiff(const SUnit *SU)
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:251
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:303
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:293
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:275
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:272
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:450
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:292
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isCall
Is a function call.
Definition: ScheduleDAG.h:279
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:380
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:277
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:371
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
static MachinePassRegistry Registry
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:74
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:53
bool isDebugInstr() const
Definition: MachineInstr.h:999
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:972
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:274
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void reschedulePhysRegCopies(SUnit *SU, bool isTop)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:299
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:71
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
LiveInterval & getInterval(unsigned Reg)
int biasPhysRegCopy(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:181
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:410
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:141
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:568
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:563
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:562
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:490
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:656
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:211
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:373
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:506
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:207
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:195
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void finishBlock() override
Cleans up after scheduling in the given block.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:159
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:659
void resize(size_type N)
Definition: SmallVector.h:351