LLVM 19.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
51#include "llvm/Config/llvm-config.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 cl::desc("Force top-down list scheduling"));
83 cl::desc("Force bottom-up list scheduling"));
84namespace MISchedPostRASched {
88};
89} // end namespace MISchedPostRASched
91 "misched-postra-direction", cl::Hidden,
92 cl::desc("Post reg-alloc list scheduling direction"),
93 // Default to top-down because it was implemented first and existing targets
94 // expect that behavior by default.
97 "Force top-down post reg-alloc list scheduling"),
99 "Force bottom-up post reg-alloc list scheduling")));
102 cl::desc("Print critical path length to stdout"));
103
105 "verify-misched", cl::Hidden,
106 cl::desc("Verify machine instrs before and after machine scheduling"));
107
108#ifndef NDEBUG
110 "view-misched-dags", cl::Hidden,
111 cl::desc("Pop up a window to show MISched dags after they are processed"));
112cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
113 cl::desc("Print schedule DAGs"));
115 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
116 cl::desc("Dump resource usage at schedule boundary."));
118 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
119 cl::desc("Show details of invoking getNextResoufceCycle."));
120#else
121const bool ViewMISchedDAGs = false;
122const bool PrintDAGs = false;
123const bool MischedDetailResourceBooking = false;
124#ifdef LLVM_ENABLE_DUMP
125const bool MISchedDumpReservedCycles = false;
126#endif // LLVM_ENABLE_DUMP
127#endif // NDEBUG
128
129} // end namespace llvm
130
131#ifndef NDEBUG
132/// In some situations a few uninteresting nodes depend on nearly all other
133/// nodes in the graph, provide a cutoff to hide them.
134static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
135 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
136
138 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
139
141 cl::desc("Only schedule this function"));
142static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
143 cl::desc("Only schedule this MBB#"));
144#endif // NDEBUG
145
146/// Avoid quadratic complexity in unusually large basic blocks by limiting the
147/// size of the ready lists.
149 cl::desc("Limit ready list to N instructions"), cl::init(256));
150
151static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
152 cl::desc("Enable register pressure scheduling."), cl::init(true));
153
154static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
155 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
156
158 cl::desc("Enable memop clustering."),
159 cl::init(true));
160static cl::opt<bool>
161 ForceFastCluster("force-fast-cluster", cl::Hidden,
162 cl::desc("Switch to fast cluster algorithm with the lost "
163 "of some fusion opportunities"),
164 cl::init(false));
166 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
167 cl::desc("The threshold for fast cluster"),
168 cl::init(1000));
169
170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
173 cl::desc("Dump resource usage at schedule boundary."));
175 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
176 cl::desc("Set width of the columns with "
177 "the resources and schedule units"),
178 cl::init(19));
180 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
181 cl::desc("Set width of the columns showing resource booking."),
182 cl::init(5));
184 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
185 cl::desc("Sort the resources printed in the dump trace"));
186#endif
187
189 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
190 cl::desc("Number of intervals to track"), cl::init(10));
191
192// DAG subtrees must have at least this many nodes.
193static const unsigned MinSubtreeSize = 8;
194
195// Pin the vtables to this file.
196void MachineSchedStrategy::anchor() {}
197
198void ScheduleDAGMutation::anchor() {}
199
200//===----------------------------------------------------------------------===//
201// Machine Instruction Scheduling Pass and Registry
202//===----------------------------------------------------------------------===//
203
206}
207
209 delete RegClassInfo;
210}
211
212namespace {
213
214/// Base class for a machine scheduler class that can run at any point.
215class MachineSchedulerBase : public MachineSchedContext,
216 public MachineFunctionPass {
217public:
218 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
219
220 void print(raw_ostream &O, const Module* = nullptr) const override;
221
222protected:
223 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
224};
225
226/// MachineScheduler runs after coalescing and before register allocation.
227class MachineScheduler : public MachineSchedulerBase {
228public:
229 MachineScheduler();
230
231 void getAnalysisUsage(AnalysisUsage &AU) const override;
232
233 bool runOnMachineFunction(MachineFunction&) override;
234
235 static char ID; // Class identification, replacement for typeinfo
236
237protected:
238 ScheduleDAGInstrs *createMachineScheduler();
239};
240
241/// PostMachineScheduler runs after shortly before code emission.
242class PostMachineScheduler : public MachineSchedulerBase {
243public:
244 PostMachineScheduler();
245
246 void getAnalysisUsage(AnalysisUsage &AU) const override;
247
248 bool runOnMachineFunction(MachineFunction&) override;
249
250 static char ID; // Class identification, replacement for typeinfo
251
252protected:
253 ScheduleDAGInstrs *createPostMachineScheduler();
254};
255
256} // end anonymous namespace
257
258char MachineScheduler::ID = 0;
259
260char &llvm::MachineSchedulerID = MachineScheduler::ID;
261
263 "Machine Instruction Scheduler", false, false)
271
272MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
274}
275
276void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
277 AU.setPreservesCFG();
287}
288
289char PostMachineScheduler::ID = 0;
290
291char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
292
293INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
294 "PostRA Machine Instruction Scheduler", false, false)
298INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
300
301PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
303}
304
305void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
306 AU.setPreservesCFG();
312}
313
316
317/// A dummy default scheduler factory indicates whether the scheduler
318/// is overridden on the command line.
320 return nullptr;
321}
322
323/// MachineSchedOpt allows command line selection of the scheduler.
328 cl::desc("Machine instruction scheduler to use"));
329
331DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
333
335 "enable-misched",
336 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
337 cl::Hidden);
338
340 "enable-post-misched",
341 cl::desc("Enable the post-ra machine instruction scheduling pass."),
342 cl::init(true), cl::Hidden);
343
344/// Decrement this iterator until reaching the top or a non-debug instr.
348 assert(I != Beg && "reached the top of the region, cannot decrement");
349 while (--I != Beg) {
350 if (!I->isDebugOrPseudoInstr())
351 break;
352 }
353 return I;
354}
355
356/// Non-const version.
362}
363
364/// If this iterator is a debug value, increment until reaching the End or a
365/// non-debug instruction.
369 for(; I != End; ++I) {
370 if (!I->isDebugOrPseudoInstr())
371 break;
372 }
373 return I;
374}
375
376/// Non-const version.
382}
383
384/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
385ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
386 // Select the scheduler, or set the default.
388 if (Ctor != useDefaultMachineSched)
389 return Ctor(this);
390
391 // Get the default scheduler set by the target for this function.
392 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
393 if (Scheduler)
394 return Scheduler;
395
396 // Default to GenericScheduler.
397 return createGenericSchedLive(this);
398}
399
400/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
401/// the caller. We don't have a command line option to override the postRA
402/// scheduler. The Target must configure it.
403ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
404 // Get the postRA scheduler set by the target for this function.
405 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
406 if (Scheduler)
407 return Scheduler;
408
409 // Default to GenericScheduler.
410 return createGenericSchedPostRA(this);
411}
412
413/// Top-level MachineScheduler pass driver.
414///
415/// Visit blocks in function order. Divide each block into scheduling regions
416/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
417/// consistent with the DAG builder, which traverses the interior of the
418/// scheduling regions bottom-up.
419///
420/// This design avoids exposing scheduling boundaries to the DAG builder,
421/// simplifying the DAG builder's support for "special" target instructions.
422/// At the same time the design allows target schedulers to operate across
423/// scheduling boundaries, for example to bundle the boundary instructions
424/// without reordering them. This creates complexity, because the target
425/// scheduler must update the RegionBegin and RegionEnd positions cached by
426/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
427/// design would be to split blocks at scheduling boundaries, but LLVM has a
428/// general bias against block splitting purely for implementation simplicity.
429bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
430 if (skipFunction(mf.getFunction()))
431 return false;
432
433 if (EnableMachineSched.getNumOccurrences()) {
435 return false;
436 } else if (!mf.getSubtarget().enableMachineScheduler())
437 return false;
438
439 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
440
441 // Initialize the context of the pass.
442 MF = &mf;
443 MLI = &getAnalysis<MachineLoopInfo>();
444 MDT = &getAnalysis<MachineDominatorTree>();
445 PassConfig = &getAnalysis<TargetPassConfig>();
446 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
447
448 LIS = &getAnalysis<LiveIntervals>();
449
450 if (VerifyScheduling) {
451 LLVM_DEBUG(LIS->dump());
452 MF->verify(this, "Before machine scheduling.");
453 }
454 RegClassInfo->runOnMachineFunction(*MF);
455
456 // Instantiate the selected scheduler for this target, function, and
457 // optimization level.
458 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
460 if (ForceTopDown)
462 else if (ForceBottomUp)
464 else
466 Scheduler->setDumpDirection(D);
467 scheduleRegions(*Scheduler, false);
468
469 LLVM_DEBUG(LIS->dump());
471 MF->verify(this, "After machine scheduling.");
472 return true;
473}
474
475bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
476 if (skipFunction(mf.getFunction()))
477 return false;
478
479 if (EnablePostRAMachineSched.getNumOccurrences()) {
481 return false;
482 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
483 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
484 return false;
485 }
486 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
487
488 // Initialize the context of the pass.
489 MF = &mf;
490 MLI = &getAnalysis<MachineLoopInfo>();
491 PassConfig = &getAnalysis<TargetPassConfig>();
492 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
493
495 MF->verify(this, "Before post machine scheduling.");
496
497 // Instantiate the selected scheduler for this target, function, and
498 // optimization level.
499 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
503 else
505 Scheduler->setDumpDirection(D);
506 scheduleRegions(*Scheduler, true);
507
509 MF->verify(this, "After post machine scheduling.");
510 return true;
511}
512
513/// Return true of the given instruction should not be included in a scheduling
514/// region.
515///
516/// MachineScheduler does not currently support scheduling across calls. To
517/// handle calls, the DAG builder needs to be modified to create register
518/// anti/output dependencies on the registers clobbered by the call's regmask
519/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
520/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
521/// the boundary, but there would be no benefit to postRA scheduling across
522/// calls this late anyway.
525 MachineFunction *MF,
526 const TargetInstrInfo *TII) {
527 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
528}
529
530/// A region of an MBB for scheduling.
531namespace {
532struct SchedRegion {
533 /// RegionBegin is the first instruction in the scheduling region, and
534 /// RegionEnd is either MBB->end() or the scheduling boundary after the
535 /// last instruction in the scheduling region. These iterators cannot refer
536 /// to instructions outside of the identified scheduling region because
537 /// those may be reordered before scheduling this region.
538 MachineBasicBlock::iterator RegionBegin;
540 unsigned NumRegionInstrs;
541
543 unsigned N) :
544 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
545};
546} // end anonymous namespace
547
549
550static void
552 MBBRegionsVector &Regions,
553 bool RegionsTopDown) {
556
558 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
559 RegionEnd != MBB->begin(); RegionEnd = I) {
560
561 // Avoid decrementing RegionEnd for blocks with no terminator.
562 if (RegionEnd != MBB->end() ||
563 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
564 --RegionEnd;
565 }
566
567 // The next region starts above the previous region. Look backward in the
568 // instruction stream until we find the nearest boundary.
569 unsigned NumRegionInstrs = 0;
570 I = RegionEnd;
571 for (;I != MBB->begin(); --I) {
572 MachineInstr &MI = *std::prev(I);
573 if (isSchedBoundary(&MI, &*MBB, MF, TII))
574 break;
575 if (!MI.isDebugOrPseudoInstr()) {
576 // MBB::size() uses instr_iterator to count. Here we need a bundle to
577 // count as a single instruction.
578 ++NumRegionInstrs;
579 }
580 }
581
582 // It's possible we found a scheduling region that only has debug
583 // instructions. Don't bother scheduling these.
584 if (NumRegionInstrs != 0)
585 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
586 }
587
588 if (RegionsTopDown)
589 std::reverse(Regions.begin(), Regions.end());
590}
591
592/// Main driver for both MachineScheduler and PostMachineScheduler.
593void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
594 bool FixKillFlags) {
595 // Visit all machine basic blocks.
596 //
597 // TODO: Visit blocks in global postorder or postorder within the bottom-up
598 // loop tree. Then we can optionally compute global RegPressure.
599 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
600 MBB != MBBEnd; ++MBB) {
601
602 Scheduler.startBlock(&*MBB);
603
604#ifndef NDEBUG
605 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
606 continue;
607 if (SchedOnlyBlock.getNumOccurrences()
608 && (int)SchedOnlyBlock != MBB->getNumber())
609 continue;
610#endif
611
612 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
613 // points to the scheduling boundary at the bottom of the region. The DAG
614 // does not include RegionEnd, but the region does (i.e. the next
615 // RegionEnd is above the previous RegionBegin). If the current block has
616 // no terminator then RegionEnd == MBB->end() for the bottom region.
617 //
618 // All the regions of MBB are first found and stored in MBBRegions, which
619 // will be processed (MBB) top-down if initialized with true.
620 //
621 // The Scheduler may insert instructions during either schedule() or
622 // exitRegion(), even for empty regions. So the local iterators 'I' and
623 // 'RegionEnd' are invalid across these calls. Instructions must not be
624 // added to other regions than the current one without updating MBBRegions.
625
626 MBBRegionsVector MBBRegions;
627 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
628 for (const SchedRegion &R : MBBRegions) {
629 MachineBasicBlock::iterator I = R.RegionBegin;
630 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
631 unsigned NumRegionInstrs = R.NumRegionInstrs;
632
633 // Notify the scheduler of the region, even if we may skip scheduling
634 // it. Perhaps it still needs to be bundled.
635 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
636
637 // Skip empty scheduling regions (0 or 1 schedulable instructions).
638 if (I == RegionEnd || I == std::prev(RegionEnd)) {
639 // Close the current region. Bundle the terminator if needed.
640 // This invalidates 'RegionEnd' and 'I'.
641 Scheduler.exitRegion();
642 continue;
643 }
644 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
645 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
646 << " " << MBB->getName() << "\n From: " << *I
647 << " To: ";
648 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
649 else dbgs() << "End\n";
650 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
652 errs() << MF->getName();
653 errs() << ":%bb. " << MBB->getNumber();
654 errs() << " " << MBB->getName() << " \n";
655 }
656
657 // Schedule a region: possibly reorder instructions.
658 // This invalidates the original region iterators.
659 Scheduler.schedule();
660
661 // Close the current region.
662 Scheduler.exitRegion();
663 }
664 Scheduler.finishBlock();
665 // FIXME: Ideally, no further passes should rely on kill flags. However,
666 // thumb2 size reduction is currently an exception, so the PostMIScheduler
667 // needs to do this.
668 if (FixKillFlags)
669 Scheduler.fixupKills(*MBB);
670 }
671 Scheduler.finalizeSchedule();
672}
673
674void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
675 // unimplemented
676}
677
678#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
680 dbgs() << "Queue " << Name << ": ";
681 for (const SUnit *SU : Queue)
682 dbgs() << SU->NodeNum << " ";
683 dbgs() << "\n";
684}
685#endif
686
687//===----------------------------------------------------------------------===//
688// ScheduleDAGMI - Basic machine instruction scheduling. This is
689// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
690// virtual registers.
691// ===----------------------------------------------------------------------===/
692
693// Provide a vtable anchor.
695
696/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
697/// NumPredsLeft reaches zero, release the successor node.
698///
699/// FIXME: Adjust SuccSU height based on MinLatency.
701 SUnit *SuccSU = SuccEdge->getSUnit();
702
703 if (SuccEdge->isWeak()) {
704 --SuccSU->WeakPredsLeft;
705 if (SuccEdge->isCluster())
706 NextClusterSucc = SuccSU;
707 return;
708 }
709#ifndef NDEBUG
710 if (SuccSU->NumPredsLeft == 0) {
711 dbgs() << "*** Scheduling failed! ***\n";
712 dumpNode(*SuccSU);
713 dbgs() << " has been released too many times!\n";
714 llvm_unreachable(nullptr);
715 }
716#endif
717 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
718 // CurrCycle may have advanced since then.
719 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
720 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
721
722 --SuccSU->NumPredsLeft;
723 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
724 SchedImpl->releaseTopNode(SuccSU);
725}
726
727/// releaseSuccessors - Call releaseSucc on each of SU's successors.
729 for (SDep &Succ : SU->Succs)
730 releaseSucc(SU, &Succ);
731}
732
733/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
734/// NumSuccsLeft reaches zero, release the predecessor node.
735///
736/// FIXME: Adjust PredSU height based on MinLatency.
738 SUnit *PredSU = PredEdge->getSUnit();
739
740 if (PredEdge->isWeak()) {
741 --PredSU->WeakSuccsLeft;
742 if (PredEdge->isCluster())
743 NextClusterPred = PredSU;
744 return;
745 }
746#ifndef NDEBUG
747 if (PredSU->NumSuccsLeft == 0) {
748 dbgs() << "*** Scheduling failed! ***\n";
749 dumpNode(*PredSU);
750 dbgs() << " has been released too many times!\n";
751 llvm_unreachable(nullptr);
752 }
753#endif
754 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
755 // CurrCycle may have advanced since then.
756 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
757 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
758
759 --PredSU->NumSuccsLeft;
760 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
761 SchedImpl->releaseBottomNode(PredSU);
762}
763
764/// releasePredecessors - Call releasePred on each of SU's predecessors.
766 for (SDep &Pred : SU->Preds)
767 releasePred(SU, &Pred);
768}
769
772 SchedImpl->enterMBB(bb);
773}
774
776 SchedImpl->leaveMBB();
778}
779
780/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
781/// after crossing a scheduling boundary. [begin, end) includes all instructions
782/// in the region, including the boundary itself and single-instruction regions
783/// that don't get scheduled.
787 unsigned regioninstrs)
788{
789 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
790
791 SchedImpl->initPolicy(begin, end, regioninstrs);
792}
793
794/// This is normally called from the main scheduler loop but may also be invoked
795/// by the scheduling strategy to perform additional code motion.
798 // Advance RegionBegin if the first instruction moves down.
799 if (&*RegionBegin == MI)
800 ++RegionBegin;
801
802 // Update the instruction stream.
803 BB->splice(InsertPos, BB, MI);
804
805 // Update LiveIntervals
806 if (LIS)
807 LIS->handleMove(*MI, /*UpdateFlags=*/true);
808
809 // Recede RegionBegin if an instruction moves above the first.
810 if (RegionBegin == InsertPos)
811 RegionBegin = MI;
812}
813
815#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
816 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
818 return false;
819 }
820 ++NumInstrsScheduled;
821#endif
822 return true;
823}
824
825/// Per-region scheduling driver, called back from
826/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
827/// that does not consider liveness or register pressure. It is useful for
828/// PostRA scheduling and potentially other custom schedulers.
830 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
831 LLVM_DEBUG(SchedImpl->dumpPolicy());
832
833 // Build the DAG.
835
837
838 SmallVector<SUnit*, 8> TopRoots, BotRoots;
839 findRootsAndBiasEdges(TopRoots, BotRoots);
840
841 LLVM_DEBUG(dump());
842 if (PrintDAGs) dump();
844
845 // Initialize the strategy before modifying the DAG.
846 // This may initialize a DFSResult to be used for queue priority.
847 SchedImpl->initialize(this);
848
849 // Initialize ready queues now that the DAG and priority data are finalized.
850 initQueues(TopRoots, BotRoots);
851
852 bool IsTopNode = false;
853 while (true) {
854 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
855 SUnit *SU = SchedImpl->pickNode(IsTopNode);
856 if (!SU) break;
857
858 assert(!SU->isScheduled && "Node already scheduled");
859 if (!checkSchedLimit())
860 break;
861
862 MachineInstr *MI = SU->getInstr();
863 if (IsTopNode) {
864 assert(SU->isTopReady() && "node still has unscheduled dependencies");
865 if (&*CurrentTop == MI)
867 else
869 } else {
870 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
873 if (&*priorII == MI)
874 CurrentBottom = priorII;
875 else {
876 if (&*CurrentTop == MI)
877 CurrentTop = nextIfDebug(++CurrentTop, priorII);
880 }
881 }
882 // Notify the scheduling strategy before updating the DAG.
883 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
884 // runs, it can then use the accurate ReadyCycle time to determine whether
885 // newly released nodes can move to the readyQ.
886 SchedImpl->schedNode(SU, IsTopNode);
887
888 updateQueues(SU, IsTopNode);
889 }
890 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
891
893
894 LLVM_DEBUG({
895 dbgs() << "*** Final schedule for "
896 << printMBBReference(*begin()->getParent()) << " ***\n";
897 dumpSchedule();
898 dbgs() << '\n';
899 });
900}
901
902/// Apply each ScheduleDAGMutation step in order.
904 for (auto &m : Mutations)
905 m->apply(this);
906}
907
910 SmallVectorImpl<SUnit*> &BotRoots) {
911 for (SUnit &SU : SUnits) {
912 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
913
914 // Order predecessors so DFSResult follows the critical path.
915 SU.biasCriticalPath();
916
917 // A SUnit is ready to top schedule if it has no predecessors.
918 if (!SU.NumPredsLeft)
919 TopRoots.push_back(&SU);
920 // A SUnit is ready to bottom schedule if it has no successors.
921 if (!SU.NumSuccsLeft)
922 BotRoots.push_back(&SU);
923 }
925}
926
927/// Identify DAG roots and setup scheduler queues.
929 ArrayRef<SUnit*> BotRoots) {
930 NextClusterSucc = nullptr;
931 NextClusterPred = nullptr;
932
933 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
934 //
935 // Nodes with unreleased weak edges can still be roots.
936 // Release top roots in forward order.
937 for (SUnit *SU : TopRoots)
938 SchedImpl->releaseTopNode(SU);
939
940 // Release bottom roots in reverse order so the higher priority nodes appear
941 // first. This is more natural and slightly more efficient.
943 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
944 SchedImpl->releaseBottomNode(*I);
945 }
946
949
950 SchedImpl->registerRoots();
951
952 // Advance past initial DebugValues.
955}
956
957/// Update scheduler queues after scheduling an instruction.
958void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
959 // Release dependent instructions for scheduling.
960 if (IsTopNode)
962 else
964
965 SU->isScheduled = true;
966}
967
968/// Reinsert any remaining debug_values, just like the PostRA scheduler.
970 // If first instruction was a DBG_VALUE then put it back.
971 if (FirstDbgValue) {
974 }
975
976 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
977 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
978 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
979 MachineInstr *DbgValue = P.first;
980 MachineBasicBlock::iterator OrigPrevMI = P.second;
981 if (&*RegionBegin == DbgValue)
982 ++RegionBegin;
983 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
984 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
986 }
987}
988
989#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
990static const char *scheduleTableLegend = " i: issue\n x: resource booked";
991
993 // Bail off when there is no schedule model to query.
995 return;
996
997 // Nothing to show if there is no or just one instruction.
998 if (BB->size() < 2)
999 return;
1000
1001 dbgs() << " * Schedule table (TopDown):\n";
1002 dbgs() << scheduleTableLegend << "\n";
1003 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1004 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1005 for (MachineInstr &MI : *this) {
1006 SUnit *SU = getSUnit(&MI);
1007 if (!SU)
1008 continue;
1009 const MCSchedClassDesc *SC = getSchedClass(SU);
1012 PI != PE; ++PI) {
1013 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1014 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1015 }
1016 }
1017 // Print the header with the cycles
1018 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1019 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1020 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1021 dbgs() << "|\n";
1022
1023 for (MachineInstr &MI : *this) {
1024 SUnit *SU = getSUnit(&MI);
1025 if (!SU) {
1026 dbgs() << "Missing SUnit\n";
1027 continue;
1028 }
1029 std::string NodeName("SU(");
1030 NodeName += std::to_string(SU->NodeNum) + ")";
1031 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1032 unsigned C = FirstCycle;
1033 for (; C <= LastCycle; ++C) {
1034 if (C == SU->TopReadyCycle)
1035 dbgs() << llvm::left_justify("| i", ColWidth);
1036 else
1037 dbgs() << llvm::left_justify("|", ColWidth);
1038 }
1039 dbgs() << "|\n";
1040 const MCSchedClassDesc *SC = getSchedClass(SU);
1041
1045
1047 llvm::stable_sort(ResourcesIt,
1048 [](const MCWriteProcResEntry &LHS,
1049 const MCWriteProcResEntry &RHS) -> bool {
1050 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1051 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1052 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1053 });
1054 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1055 C = FirstCycle;
1056 const std::string ResName =
1057 SchedModel.getResourceName(PI.ProcResourceIdx);
1058 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1059 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1060 dbgs() << llvm::left_justify("|", ColWidth);
1061 }
1062 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1063 ++I, ++C)
1064 dbgs() << llvm::left_justify("| x", ColWidth);
1065 while (C++ <= LastCycle)
1066 dbgs() << llvm::left_justify("|", ColWidth);
1067 // Place end char
1068 dbgs() << "| \n";
1069 }
1070 }
1071}
1072
1074 // Bail off when there is no schedule model to query.
1076 return;
1077
1078 // Nothing to show if there is no or just one instruction.
1079 if (BB->size() < 2)
1080 return;
1081
1082 dbgs() << " * Schedule table (BottomUp):\n";
1083 dbgs() << scheduleTableLegend << "\n";
1084
1085 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1086 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1087 for (MachineInstr &MI : *this) {
1088 SUnit *SU = getSUnit(&MI);
1089 if (!SU)
1090 continue;
1091 const MCSchedClassDesc *SC = getSchedClass(SU);
1094 PI != PE; ++PI) {
1095 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1096 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1097 }
1098 }
1099 // Print the header with the cycles
1100 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1101 for (int C = FirstCycle; C >= LastCycle; --C)
1102 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1103 dbgs() << "|\n";
1104
1105 for (MachineInstr &MI : *this) {
1106 SUnit *SU = getSUnit(&MI);
1107 if (!SU) {
1108 dbgs() << "Missing SUnit\n";
1109 continue;
1110 }
1111 std::string NodeName("SU(");
1112 NodeName += std::to_string(SU->NodeNum) + ")";
1113 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1114 int C = FirstCycle;
1115 for (; C >= LastCycle; --C) {
1116 if (C == (int)SU->BotReadyCycle)
1117 dbgs() << llvm::left_justify("| i", ColWidth);
1118 else
1119 dbgs() << llvm::left_justify("|", ColWidth);
1120 }
1121 dbgs() << "|\n";
1122 const MCSchedClassDesc *SC = getSchedClass(SU);
1126
1128 llvm::stable_sort(ResourcesIt,
1129 [](const MCWriteProcResEntry &LHS,
1130 const MCWriteProcResEntry &RHS) -> bool {
1131 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1132 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1133 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1134 });
1135 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1136 C = FirstCycle;
1137 const std::string ResName =
1138 SchedModel.getResourceName(PI.ProcResourceIdx);
1139 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1140 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1141 dbgs() << llvm::left_justify("|", ColWidth);
1142 }
1143 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1144 ++I, --C)
1145 dbgs() << llvm::left_justify("| x", ColWidth);
1146 while (C-- >= LastCycle)
1147 dbgs() << llvm::left_justify("|", ColWidth);
1148 // Place end char
1149 dbgs() << "| \n";
1150 }
1151 }
1152}
1153#endif
1154
1155#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1160 else if (DumpDir == DumpDirection::BottomUp)
1163 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1164 } else {
1165 dbgs() << "* Schedule table: DumpDirection not set.\n";
1166 }
1167 }
1168
1169 for (MachineInstr &MI : *this) {
1170 if (SUnit *SU = getSUnit(&MI))
1171 dumpNode(*SU);
1172 else
1173 dbgs() << "Missing SUnit\n";
1174 }
1175}
1176#endif
1177
1178//===----------------------------------------------------------------------===//
1179// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1180// preservation.
1181//===----------------------------------------------------------------------===//
1182
1184 delete DFSResult;
1185}
1186
1188 const MachineInstr &MI = *SU.getInstr();
1189 for (const MachineOperand &MO : MI.operands()) {
1190 if (!MO.isReg())
1191 continue;
1192 if (!MO.readsReg())
1193 continue;
1194 if (TrackLaneMasks && !MO.isUse())
1195 continue;
1196
1197 Register Reg = MO.getReg();
1198 if (!Reg.isVirtual())
1199 continue;
1200
1201 // Ignore re-defs.
1202 if (TrackLaneMasks) {
1203 bool FoundDef = false;
1204 for (const MachineOperand &MO2 : MI.all_defs()) {
1205 if (MO2.getReg() == Reg && !MO2.isDead()) {
1206 FoundDef = true;
1207 break;
1208 }
1209 }
1210 if (FoundDef)
1211 continue;
1212 }
1213
1214 // Record this local VReg use.
1216 for (; UI != VRegUses.end(); ++UI) {
1217 if (UI->SU == &SU)
1218 break;
1219 }
1220 if (UI == VRegUses.end())
1222 }
1223}
1224
1225/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1226/// crossing a scheduling boundary. [begin, end) includes all instructions in
1227/// the region, including the boundary itself and single-instruction regions
1228/// that don't get scheduled.
1232 unsigned regioninstrs)
1233{
1234 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1235 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1236
1237 // For convenience remember the end of the liveness region.
1238 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1239
1241
1242 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1243 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1244
1246 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1247}
1248
1249// Setup the register pressure trackers for the top scheduled and bottom
1250// scheduled regions.
1252 VRegUses.clear();
1254 for (SUnit &SU : SUnits)
1255 collectVRegUses(SU);
1256
1258 ShouldTrackLaneMasks, false);
1260 ShouldTrackLaneMasks, false);
1261
1262 // Close the RPTracker to finalize live ins.
1264
1266
1267 // Initialize the live ins and live outs.
1270
1271 // Close one end of the tracker so we can call
1272 // getMaxUpward/DownwardPressureDelta before advancing across any
1273 // instructions. This converts currently live regs into live ins/outs.
1276
1278 if (!BotRPTracker.getLiveThru().empty()) {
1280 LLVM_DEBUG(dbgs() << "Live Thru: ";
1282 };
1283
1284 // For each live out vreg reduce the pressure change associated with other
1285 // uses of the same vreg below the live-out reaching def.
1287
1288 // Account for liveness generated by the region boundary.
1289 if (LiveRegionEnd != RegionEnd) {
1291 BotRPTracker.recede(&LiveUses);
1292 updatePressureDiffs(LiveUses);
1293 }
1294
1295 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1297 dbgs() << "Bottom Pressure:\n";
1299
1301 (RegionEnd->isDebugInstr() &&
1303 "Can't find the region bottom");
1304
1305 // Cache the list of excess pressure sets in this region. This will also track
1306 // the max pressure in the scheduled code for these sets.
1307 RegionCriticalPSets.clear();
1308 const std::vector<unsigned> &RegionPressure =
1310 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1311 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1312 if (RegionPressure[i] > Limit) {
1313 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1314 << " Actual " << RegionPressure[i] << "\n");
1315 RegionCriticalPSets.push_back(PressureChange(i));
1316 }
1317 }
1318 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1319 for (const PressureChange &RCPS
1321 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1322 dbgs() << "\n");
1323}
1324
1327 const std::vector<unsigned> &NewMaxPressure) {
1328 const PressureDiff &PDiff = getPressureDiff(SU);
1329 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1330 for (const PressureChange &PC : PDiff) {
1331 if (!PC.isValid())
1332 break;
1333 unsigned ID = PC.getPSet();
1334 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1335 ++CritIdx;
1336 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1337 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1338 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1339 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1340 }
1341 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1342 if (NewMaxPressure[ID] >= Limit - 2) {
1343 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1344 << NewMaxPressure[ID]
1345 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1346 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1347 << " livethru)\n");
1348 }
1349 }
1350}
1351
1352/// Update the PressureDiff array for liveness after scheduling this
1353/// instruction.
1355 ArrayRef<RegisterMaskPair> LiveUses) {
1356 for (const RegisterMaskPair &P : LiveUses) {
1357 Register Reg = P.RegUnit;
1358 /// FIXME: Currently assuming single-use physregs.
1359 if (!Reg.isVirtual())
1360 continue;
1361
1363 // If the register has just become live then other uses won't change
1364 // this fact anymore => decrement pressure.
1365 // If the register has just become dead then other uses make it come
1366 // back to life => increment pressure.
1367 bool Decrement = P.LaneMask.any();
1368
1369 for (const VReg2SUnit &V2SU
1370 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1371 SUnit &SU = *V2SU.SU;
1372 if (SU.isScheduled || &SU == &ExitSU)
1373 continue;
1374
1375 PressureDiff &PDiff = getPressureDiff(&SU);
1376 PDiff.addPressureChange(Reg, Decrement, &MRI);
1377 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1378 << printReg(Reg, TRI) << ':'
1379 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1380 dbgs() << " to "; PDiff.dump(*TRI););
1381 }
1382 } else {
1383 assert(P.LaneMask.any());
1384 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1385 // This may be called before CurrentBottom has been initialized. However,
1386 // BotRPTracker must have a valid position. We want the value live into the
1387 // instruction or live out of the block, so ask for the previous
1388 // instruction's live-out.
1389 const LiveInterval &LI = LIS->getInterval(Reg);
1390 VNInfo *VNI;
1393 if (I == BB->end())
1394 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1395 else {
1397 VNI = LRQ.valueIn();
1398 }
1399 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1400 assert(VNI && "No live value at use.");
1401 for (const VReg2SUnit &V2SU
1402 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1403 SUnit *SU = V2SU.SU;
1404 // If this use comes before the reaching def, it cannot be a last use,
1405 // so decrease its pressure change.
1406 if (!SU->isScheduled && SU != &ExitSU) {
1407 LiveQueryResult LRQ =
1409 if (LRQ.valueIn() == VNI) {
1410 PressureDiff &PDiff = getPressureDiff(SU);
1411 PDiff.addPressureChange(Reg, true, &MRI);
1412 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1413 << *SU->getInstr();
1414 dbgs() << " to "; PDiff.dump(*TRI););
1415 }
1416 }
1417 }
1418 }
1419 }
1420}
1421
1423#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1424 if (EntrySU.getInstr() != nullptr)
1426 for (const SUnit &SU : SUnits) {
1427 dumpNodeAll(SU);
1428 if (ShouldTrackPressure) {
1429 dbgs() << " Pressure Diff : ";
1430 getPressureDiff(&SU).dump(*TRI);
1431 }
1432 dbgs() << " Single Issue : ";
1433 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1434 SchedModel.mustEndGroup(SU.getInstr()))
1435 dbgs() << "true;";
1436 else
1437 dbgs() << "false;";
1438 dbgs() << '\n';
1439 }
1440 if (ExitSU.getInstr() != nullptr)
1442#endif
1443}
1444
1445/// schedule - Called back from MachineScheduler::runOnMachineFunction
1446/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1447/// only includes instructions that have DAG nodes, not scheduling boundaries.
1448///
1449/// This is a skeletal driver, with all the functionality pushed into helpers,
1450/// so that it can be easily extended by experimental schedulers. Generally,
1451/// implementing MachineSchedStrategy should be sufficient to implement a new
1452/// scheduling algorithm. However, if a scheduler further subclasses
1453/// ScheduleDAGMILive then it will want to override this virtual method in order
1454/// to update any specialized state.
1456 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1457 LLVM_DEBUG(SchedImpl->dumpPolicy());
1459
1461
1462 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1463 findRootsAndBiasEdges(TopRoots, BotRoots);
1464
1465 // Initialize the strategy before modifying the DAG.
1466 // This may initialize a DFSResult to be used for queue priority.
1467 SchedImpl->initialize(this);
1468
1469 LLVM_DEBUG(dump());
1470 if (PrintDAGs) dump();
1472
1473 // Initialize ready queues now that the DAG and priority data are finalized.
1474 initQueues(TopRoots, BotRoots);
1475
1476 bool IsTopNode = false;
1477 while (true) {
1478 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1479 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1480 if (!SU) break;
1481
1482 assert(!SU->isScheduled && "Node already scheduled");
1483 if (!checkSchedLimit())
1484 break;
1485
1486 scheduleMI(SU, IsTopNode);
1487
1488 if (DFSResult) {
1489 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1490 if (!ScheduledTrees.test(SubtreeID)) {
1491 ScheduledTrees.set(SubtreeID);
1492 DFSResult->scheduleTree(SubtreeID);
1493 SchedImpl->scheduleTree(SubtreeID);
1494 }
1495 }
1496
1497 // Notify the scheduling strategy after updating the DAG.
1498 SchedImpl->schedNode(SU, IsTopNode);
1499
1500 updateQueues(SU, IsTopNode);
1501 }
1502 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1503
1505
1506 LLVM_DEBUG({
1507 dbgs() << "*** Final schedule for "
1508 << printMBBReference(*begin()->getParent()) << " ***\n";
1509 dumpSchedule();
1510 dbgs() << '\n';
1511 });
1512}
1513
1514/// Build the DAG and setup three register pressure trackers.
1516 if (!ShouldTrackPressure) {
1517 RPTracker.reset();
1518 RegionCriticalPSets.clear();
1520 return;
1521 }
1522
1523 // Initialize the register pressure tracker used by buildSchedGraph.
1525 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1526
1527 // Account for liveness generate by the region boundary.
1528 if (LiveRegionEnd != RegionEnd)
1529 RPTracker.recede();
1530
1531 // Build the DAG, and compute current register pressure.
1533
1534 // Initialize top/bottom trackers after computing region pressure.
1536}
1537
1539 if (!DFSResult)
1540 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1541 DFSResult->clear();
1543 DFSResult->resize(SUnits.size());
1546}
1547
1548/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1549/// only provides the critical path for single block loops. To handle loops that
1550/// span blocks, we could use the vreg path latencies provided by
1551/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1552/// available for use in the scheduler.
1553///
1554/// The cyclic path estimation identifies a def-use pair that crosses the back
1555/// edge and considers the depth and height of the nodes. For example, consider
1556/// the following instruction sequence where each instruction has unit latency
1557/// and defines an eponymous virtual register:
1558///
1559/// a->b(a,c)->c(b)->d(c)->exit
1560///
1561/// The cyclic critical path is a two cycles: b->c->b
1562/// The acyclic critical path is four cycles: a->b->c->d->exit
1563/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1564/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1565/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1566/// LiveInDepth = depth(b) = len(a->b) = 1
1567///
1568/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1569/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1570/// CyclicCriticalPath = min(2, 2) = 2
1571///
1572/// This could be relevant to PostRA scheduling, but is currently implemented
1573/// assuming LiveIntervals.
1575 // This only applies to single block loop.
1576 if (!BB->isSuccessor(BB))
1577 return 0;
1578
1579 unsigned MaxCyclicLatency = 0;
1580 // Visit each live out vreg def to find def/use pairs that cross iterations.
1582 Register Reg = P.RegUnit;
1583 if (!Reg.isVirtual())
1584 continue;
1585 const LiveInterval &LI = LIS->getInterval(Reg);
1586 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1587 if (!DefVNI)
1588 continue;
1589
1591 const SUnit *DefSU = getSUnit(DefMI);
1592 if (!DefSU)
1593 continue;
1594
1595 unsigned LiveOutHeight = DefSU->getHeight();
1596 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1597 // Visit all local users of the vreg def.
1598 for (const VReg2SUnit &V2SU
1599 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1600 SUnit *SU = V2SU.SU;
1601 if (SU == &ExitSU)
1602 continue;
1603
1604 // Only consider uses of the phi.
1606 if (!LRQ.valueIn()->isPHIDef())
1607 continue;
1608
1609 // Assume that a path spanning two iterations is a cycle, which could
1610 // overestimate in strange cases. This allows cyclic latency to be
1611 // estimated as the minimum slack of the vreg's depth or height.
1612 unsigned CyclicLatency = 0;
1613 if (LiveOutDepth > SU->getDepth())
1614 CyclicLatency = LiveOutDepth - SU->getDepth();
1615
1616 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1617 if (LiveInHeight > LiveOutHeight) {
1618 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1619 CyclicLatency = LiveInHeight - LiveOutHeight;
1620 } else
1621 CyclicLatency = 0;
1622
1623 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1624 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1625 if (CyclicLatency > MaxCyclicLatency)
1626 MaxCyclicLatency = CyclicLatency;
1627 }
1628 }
1629 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1630 return MaxCyclicLatency;
1631}
1632
1633/// Release ExitSU predecessors and setup scheduler queues. Re-position
1634/// the Top RP tracker in case the region beginning has changed.
1636 ArrayRef<SUnit*> BotRoots) {
1637 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1638 if (ShouldTrackPressure) {
1639 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1641 }
1642}
1643
1644/// Move an instruction and update register pressure.
1645void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1646 // Move the instruction to its new location in the instruction stream.
1647 MachineInstr *MI = SU->getInstr();
1648
1649 if (IsTopNode) {
1650 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1651 if (&*CurrentTop == MI)
1653 else {
1656 }
1657
1658 if (ShouldTrackPressure) {
1659 // Update top scheduled pressure.
1660 RegisterOperands RegOpers;
1661 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1663 // Adjust liveness and add missing dead+read-undef flags.
1664 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1665 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1666 } else {
1667 // Adjust for missing dead-def flags.
1668 RegOpers.detectDeadDefs(*MI, *LIS);
1669 }
1670
1671 TopRPTracker.advance(RegOpers);
1672 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1673 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1675
1677 }
1678 } else {
1679 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1682 if (&*priorII == MI)
1683 CurrentBottom = priorII;
1684 else {
1685 if (&*CurrentTop == MI) {
1686 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1688 }
1690 CurrentBottom = MI;
1692 }
1693 if (ShouldTrackPressure) {
1694 RegisterOperands RegOpers;
1695 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1697 // Adjust liveness and add missing dead+read-undef flags.
1698 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1699 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1700 } else {
1701 // Adjust for missing dead-def flags.
1702 RegOpers.detectDeadDefs(*MI, *LIS);
1703 }
1704
1708 BotRPTracker.recede(RegOpers, &LiveUses);
1709 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1710 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1712
1714 updatePressureDiffs(LiveUses);
1715 }
1716 }
1717}
1718
1719//===----------------------------------------------------------------------===//
1720// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1721//===----------------------------------------------------------------------===//
1722
1723namespace {
1724
1725/// Post-process the DAG to create cluster edges between neighboring
1726/// loads or between neighboring stores.
1727class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1728 struct MemOpInfo {
1729 SUnit *SU;
1731 int64_t Offset;
1732 LocationSize Width;
1733 bool OffsetIsScalable;
1734
1735 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1736 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1737 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1738 Width(Width), OffsetIsScalable(OffsetIsScalable) {}
1739
1740 static bool Compare(const MachineOperand *const &A,
1741 const MachineOperand *const &B) {
1742 if (A->getType() != B->getType())
1743 return A->getType() < B->getType();
1744 if (A->isReg())
1745 return A->getReg() < B->getReg();
1746 if (A->isFI()) {
1747 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1749 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1751 return StackGrowsDown ? A->getIndex() > B->getIndex()
1752 : A->getIndex() < B->getIndex();
1753 }
1754
1755 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1756 "index bases.");
1757 }
1758
1759 bool operator<(const MemOpInfo &RHS) const {
1760 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1761 // comparison instead when it's available.
1762 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1763 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1764 Compare))
1765 return true;
1766 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1767 BaseOps.begin(), BaseOps.end(), Compare))
1768 return false;
1769 if (Offset != RHS.Offset)
1770 return Offset < RHS.Offset;
1771 return SU->NodeNum < RHS.SU->NodeNum;
1772 }
1773 };
1774
1775 const TargetInstrInfo *TII;
1776 const TargetRegisterInfo *TRI;
1777 bool IsLoad;
1778 bool ReorderWhileClustering;
1779
1780public:
1781 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1782 const TargetRegisterInfo *tri, bool IsLoad,
1783 bool ReorderWhileClustering)
1784 : TII(tii), TRI(tri), IsLoad(IsLoad),
1785 ReorderWhileClustering(ReorderWhileClustering) {}
1786
1787 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1788
1789protected:
1790 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1791 ScheduleDAGInstrs *DAG);
1792 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1793 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1794 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1796};
1797
1798class StoreClusterMutation : public BaseMemOpClusterMutation {
1799public:
1800 StoreClusterMutation(const TargetInstrInfo *tii,
1801 const TargetRegisterInfo *tri,
1802 bool ReorderWhileClustering)
1803 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1804};
1805
1806class LoadClusterMutation : public BaseMemOpClusterMutation {
1807public:
1808 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1809 bool ReorderWhileClustering)
1810 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1811};
1812
1813} // end anonymous namespace
1814
1815namespace llvm {
1816
1817std::unique_ptr<ScheduleDAGMutation>
1819 const TargetRegisterInfo *TRI,
1820 bool ReorderWhileClustering) {
1821 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1822 TII, TRI, ReorderWhileClustering)
1823 : nullptr;
1824}
1825
1826std::unique_ptr<ScheduleDAGMutation>
1828 const TargetRegisterInfo *TRI,
1829 bool ReorderWhileClustering) {
1830 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1831 TII, TRI, ReorderWhileClustering)
1832 : nullptr;
1833}
1834
1835} // end namespace llvm
1836
1837// Sorting all the loads/stores first, then for each load/store, checking the
1838// following load/store one by one, until reach the first non-dependent one and
1839// call target hook to see if they can cluster.
1840// If FastCluster is enabled, we assume that, all the loads/stores have been
1841// preprocessed and now, they didn't have dependencies on each other.
1842void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1843 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1844 ScheduleDAGInstrs *DAG) {
1845 // Keep track of the current cluster length and bytes for each SUnit.
1847
1848 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1849 // cluster mem ops collected within `MemOpRecords` array.
1850 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1851 // Decision to cluster mem ops is taken based on target dependent logic
1852 auto MemOpa = MemOpRecords[Idx];
1853
1854 // Seek for the next load/store to do the cluster.
1855 unsigned NextIdx = Idx + 1;
1856 for (; NextIdx < End; ++NextIdx)
1857 // Skip if MemOpb has been clustered already or has dependency with
1858 // MemOpa.
1859 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1860 (FastCluster ||
1861 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1862 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1863 break;
1864 if (NextIdx == End)
1865 continue;
1866
1867 auto MemOpb = MemOpRecords[NextIdx];
1868 unsigned ClusterLength = 2;
1869 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1870 MemOpb.Width.getValue().getKnownMinValue();
1871 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1872 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1873 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1874 MemOpb.Width.getValue().getKnownMinValue();
1875 }
1876
1877 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1878 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1879 MemOpb.Offset, MemOpb.OffsetIsScalable,
1880 ClusterLength, CurrentClusterBytes))
1881 continue;
1882
1883 SUnit *SUa = MemOpa.SU;
1884 SUnit *SUb = MemOpb.SU;
1885 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1886 std::swap(SUa, SUb);
1887
1888 // FIXME: Is this check really required?
1889 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1890 continue;
1891
1892 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1893 << SUb->NodeNum << ")\n");
1894 ++NumClustered;
1895
1896 if (IsLoad) {
1897 // Copy successor edges from SUa to SUb. Interleaving computation
1898 // dependent on SUa can prevent load combining due to register reuse.
1899 // Predecessor edges do not need to be copied from SUb to SUa since
1900 // nearby loads should have effectively the same inputs.
1901 for (const SDep &Succ : SUa->Succs) {
1902 if (Succ.getSUnit() == SUb)
1903 continue;
1904 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1905 << ")\n");
1906 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1907 }
1908 } else {
1909 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1910 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1911 // do not need to be copied from SUa to SUb since no one will depend
1912 // on stores.
1913 // Notice that, we don't need to care about the memory dependency as
1914 // we won't try to cluster them if they have any memory dependency.
1915 for (const SDep &Pred : SUb->Preds) {
1916 if (Pred.getSUnit() == SUa)
1917 continue;
1918 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1919 << ")\n");
1920 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1921 }
1922 }
1923
1924 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1925 CurrentClusterBytes};
1926
1927 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1928 << ", Curr cluster bytes: " << CurrentClusterBytes
1929 << "\n");
1930 }
1931}
1932
1933void BaseMemOpClusterMutation::collectMemOpRecords(
1934 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1935 for (auto &SU : SUnits) {
1936 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1937 (!IsLoad && !SU.getInstr()->mayStore()))
1938 continue;
1939
1940 const MachineInstr &MI = *SU.getInstr();
1942 int64_t Offset;
1943 bool OffsetIsScalable;
1944 LocationSize Width = 0;
1946 OffsetIsScalable, Width, TRI)) {
1947 MemOpRecords.push_back(
1948 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1949
1950 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1951 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1952 << ", Width: " << Width << "\n");
1953 }
1954#ifndef NDEBUG
1955 for (const auto *Op : BaseOps)
1956 assert(Op);
1957#endif
1958 }
1959}
1960
1961bool BaseMemOpClusterMutation::groupMemOps(
1964 bool FastCluster =
1966 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1967
1968 for (const auto &MemOp : MemOps) {
1969 unsigned ChainPredID = DAG->SUnits.size();
1970 if (FastCluster) {
1971 for (const SDep &Pred : MemOp.SU->Preds) {
1972 // We only want to cluster the mem ops that have the same ctrl(non-data)
1973 // pred so that they didn't have ctrl dependency for each other. But for
1974 // store instrs, we can still cluster them if the pred is load instr.
1975 if ((Pred.isCtrl() &&
1976 (IsLoad ||
1977 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1978 !Pred.isArtificial()) {
1979 ChainPredID = Pred.getSUnit()->NodeNum;
1980 break;
1981 }
1982 }
1983 } else
1984 ChainPredID = 0;
1985
1986 Groups[ChainPredID].push_back(MemOp);
1987 }
1988 return FastCluster;
1989}
1990
1991/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1992void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1993 // Collect all the clusterable loads/stores
1994 SmallVector<MemOpInfo, 32> MemOpRecords;
1995 collectMemOpRecords(DAG->SUnits, MemOpRecords);
1996
1997 if (MemOpRecords.size() < 2)
1998 return;
1999
2000 // Put the loads/stores without dependency into the same group with some
2001 // heuristic if the DAG is too complex to avoid compiling time blow up.
2002 // Notice that, some fusion pair could be lost with this.
2004 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2005
2006 for (auto &Group : Groups) {
2007 // Sorting the loads/stores, so that, we can stop the cluster as early as
2008 // possible.
2009 llvm::sort(Group.second);
2010
2011 // Trying to cluster all the neighboring loads/stores.
2012 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2013 }
2014}
2015
2016//===----------------------------------------------------------------------===//
2017// CopyConstrain - DAG post-processing to encourage copy elimination.
2018//===----------------------------------------------------------------------===//
2019
2020namespace {
2021
2022/// Post-process the DAG to create weak edges from all uses of a copy to
2023/// the one use that defines the copy's source vreg, most likely an induction
2024/// variable increment.
2025class CopyConstrain : public ScheduleDAGMutation {
2026 // Transient state.
2027 SlotIndex RegionBeginIdx;
2028
2029 // RegionEndIdx is the slot index of the last non-debug instruction in the
2030 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2031 SlotIndex RegionEndIdx;
2032
2033public:
2034 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2035
2036 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2037
2038protected:
2039 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2040};
2041
2042} // end anonymous namespace
2043
2044namespace llvm {
2045
2046std::unique_ptr<ScheduleDAGMutation>
2048 const TargetRegisterInfo *TRI) {
2049 return std::make_unique<CopyConstrain>(TII, TRI);
2050}
2051
2052} // end namespace llvm
2053
2054/// constrainLocalCopy handles two possibilities:
2055/// 1) Local src:
2056/// I0: = dst
2057/// I1: src = ...
2058/// I2: = dst
2059/// I3: dst = src (copy)
2060/// (create pred->succ edges I0->I1, I2->I1)
2061///
2062/// 2) Local copy:
2063/// I0: dst = src (copy)
2064/// I1: = dst
2065/// I2: src = ...
2066/// I3: = dst
2067/// (create pred->succ edges I1->I2, I3->I2)
2068///
2069/// Although the MachineScheduler is currently constrained to single blocks,
2070/// this algorithm should handle extended blocks. An EBB is a set of
2071/// contiguously numbered blocks such that the previous block in the EBB is
2072/// always the single predecessor.
2073void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2074 LiveIntervals *LIS = DAG->getLIS();
2075 MachineInstr *Copy = CopySU->getInstr();
2076
2077 // Check for pure vreg copies.
2078 const MachineOperand &SrcOp = Copy->getOperand(1);
2079 Register SrcReg = SrcOp.getReg();
2080 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2081 return;
2082
2083 const MachineOperand &DstOp = Copy->getOperand(0);
2084 Register DstReg = DstOp.getReg();
2085 if (!DstReg.isVirtual() || DstOp.isDead())
2086 return;
2087
2088 // Check if either the dest or source is local. If it's live across a back
2089 // edge, it's not local. Note that if both vregs are live across the back
2090 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2091 // If both the copy's source and dest are local live intervals, then we
2092 // should treat the dest as the global for the purpose of adding
2093 // constraints. This adds edges from source's other uses to the copy.
2094 unsigned LocalReg = SrcReg;
2095 unsigned GlobalReg = DstReg;
2096 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2097 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2098 LocalReg = DstReg;
2099 GlobalReg = SrcReg;
2100 LocalLI = &LIS->getInterval(LocalReg);
2101 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2102 return;
2103 }
2104 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2105
2106 // Find the global segment after the start of the local LI.
2107 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2108 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2109 // local live range. We could create edges from other global uses to the local
2110 // start, but the coalescer should have already eliminated these cases, so
2111 // don't bother dealing with it.
2112 if (GlobalSegment == GlobalLI->end())
2113 return;
2114
2115 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2116 // returned the next global segment. But if GlobalSegment overlaps with
2117 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2118 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2119 if (GlobalSegment->contains(LocalLI->beginIndex()))
2120 ++GlobalSegment;
2121
2122 if (GlobalSegment == GlobalLI->end())
2123 return;
2124
2125 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2126 if (GlobalSegment != GlobalLI->begin()) {
2127 // Two address defs have no hole.
2128 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2129 GlobalSegment->start)) {
2130 return;
2131 }
2132 // If the prior global segment may be defined by the same two-address
2133 // instruction that also defines LocalLI, then can't make a hole here.
2134 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2135 LocalLI->beginIndex())) {
2136 return;
2137 }
2138 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2139 // it would be a disconnected component in the live range.
2140 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2141 "Disconnected LRG within the scheduling region.");
2142 }
2143 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2144 if (!GlobalDef)
2145 return;
2146
2147 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2148 if (!GlobalSU)
2149 return;
2150
2151 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2152 // constraining the uses of the last local def to precede GlobalDef.
2153 SmallVector<SUnit*,8> LocalUses;
2154 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2155 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2156 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2157 for (const SDep &Succ : LastLocalSU->Succs) {
2158 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2159 continue;
2160 if (Succ.getSUnit() == GlobalSU)
2161 continue;
2162 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2163 return;
2164 LocalUses.push_back(Succ.getSUnit());
2165 }
2166 // Open the top of the GlobalLI hole by constraining any earlier global uses
2167 // to precede the start of LocalLI.
2168 SmallVector<SUnit*,8> GlobalUses;
2169 MachineInstr *FirstLocalDef =
2170 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2171 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2172 for (const SDep &Pred : GlobalSU->Preds) {
2173 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2174 continue;
2175 if (Pred.getSUnit() == FirstLocalSU)
2176 continue;
2177 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2178 return;
2179 GlobalUses.push_back(Pred.getSUnit());
2180 }
2181 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2182 // Add the weak edges.
2183 for (SUnit *LU : LocalUses) {
2184 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2185 << GlobalSU->NodeNum << ")\n");
2186 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2187 }
2188 for (SUnit *GU : GlobalUses) {
2189 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2190 << FirstLocalSU->NodeNum << ")\n");
2191 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2192 }
2193}
2194
2195/// Callback from DAG postProcessing to create weak edges to encourage
2196/// copy elimination.
2197void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2198 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2199 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2200
2201 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2202 if (FirstPos == DAG->end())
2203 return;
2204 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2205 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2206 *priorNonDebug(DAG->end(), DAG->begin()));
2207
2208 for (SUnit &SU : DAG->SUnits) {
2209 if (!SU.getInstr()->isCopy())
2210 continue;
2211
2212 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2213 }
2214}
2215
2216//===----------------------------------------------------------------------===//
2217// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2218// and possibly other custom schedulers.
2219//===----------------------------------------------------------------------===//
2220
2221static const unsigned InvalidCycle = ~0U;
2222
2224
2225/// Given a Count of resource usage and a Latency value, return true if a
2226/// SchedBoundary becomes resource limited.
2227/// If we are checking after scheduling a node, we should return true when
2228/// we just reach the resource limit.
2229static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2230 unsigned Latency, bool AfterSchedNode) {
2231 int ResCntFactor = (int)(Count - (Latency * LFactor));
2232 if (AfterSchedNode)
2233 return ResCntFactor >= (int)LFactor;
2234 else
2235 return ResCntFactor > (int)LFactor;
2236}
2237
2239 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2240 // Destroying and reconstructing it is very expensive though. So keep
2241 // invalid, placeholder HazardRecs.
2242 if (HazardRec && HazardRec->isEnabled()) {
2243 delete HazardRec;
2244 HazardRec = nullptr;
2245 }
2246 Available.clear();
2247 Pending.clear();
2248 CheckPending = false;
2249 CurrCycle = 0;
2250 CurrMOps = 0;
2251 MinReadyCycle = std::numeric_limits<unsigned>::max();
2252 ExpectedLatency = 0;
2253 DependentLatency = 0;
2254 RetiredMOps = 0;
2255 MaxExecutedResCount = 0;
2256 ZoneCritResIdx = 0;
2257 IsResourceLimited = false;
2258 ReservedCycles.clear();
2259 ReservedResourceSegments.clear();
2260 ReservedCyclesIndex.clear();
2261 ResourceGroupSubUnitMasks.clear();
2262#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2263 // Track the maximum number of stall cycles that could arise either from the
2264 // latency of a DAG edge or the number of cycles that a processor resource is
2265 // reserved (SchedBoundary::ReservedCycles).
2266 MaxObservedStall = 0;
2267#endif
2268 // Reserve a zero-count for invalid CritResIdx.
2269 ExecutedResCounts.resize(1);
2270 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2271}
2272
2274init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2275 reset();
2276 if (!SchedModel->hasInstrSchedModel())
2277 return;
2279 for (SUnit &SU : DAG->SUnits) {
2280 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2281 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2282 * SchedModel->getMicroOpFactor();
2284 PI = SchedModel->getWriteProcResBegin(SC),
2285 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2286 unsigned PIdx = PI->ProcResourceIdx;
2287 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2288 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2289 RemainingCounts[PIdx] +=
2290 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2291 }
2292 }
2293}
2294
2296init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2297 reset();
2298 DAG = dag;
2299 SchedModel = smodel;
2300 Rem = rem;
2302 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2303 ReservedCyclesIndex.resize(ResourceCount);
2304 ExecutedResCounts.resize(ResourceCount);
2305 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2306 unsigned NumUnits = 0;
2307
2308 for (unsigned i = 0; i < ResourceCount; ++i) {
2309 ReservedCyclesIndex[i] = NumUnits;
2310 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2311 if (isUnbufferedGroup(i)) {
2312 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2313 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2314 U != UE; ++U)
2315 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2316 }
2317 }
2318
2319 ReservedCycles.resize(NumUnits, InvalidCycle);
2320 }
2321}
2322
2323/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2324/// these "soft stalls" differently than the hard stall cycles based on CPU
2325/// resources and computed by checkHazard(). A fully in-order model
2326/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2327/// available for scheduling until they are ready. However, a weaker in-order
2328/// model may use this for heuristics. For example, if a processor has in-order
2329/// behavior when reading certain resources, this may come into play.
2331 if (!SU->isUnbuffered)
2332 return 0;
2333
2334 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2335 if (ReadyCycle > CurrCycle)
2336 return ReadyCycle - CurrCycle;
2337 return 0;
2338}
2339
2340/// Compute the next cycle at which the given processor resource unit
2341/// can be scheduled.
2343 unsigned ReleaseAtCycle,
2344 unsigned AcquireAtCycle) {
2346 if (isTop())
2347 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2348 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2349
2350 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2351 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2352 }
2353
2354 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2355 // If this resource has never been used, always return cycle zero.
2356 if (NextUnreserved == InvalidCycle)
2357 return CurrCycle;
2358 // For bottom-up scheduling add the cycles needed for the current operation.
2359 if (!isTop())
2360 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2361 return NextUnreserved;
2362}
2363
2364/// Compute the next cycle at which the given processor resource can be
2365/// scheduled. Returns the next cycle and the index of the processor resource
2366/// instance in the reserved cycles vector.
2367std::pair<unsigned, unsigned>
2369 unsigned ReleaseAtCycle,
2370 unsigned AcquireAtCycle) {
2372 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2374 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2375 }
2376 unsigned MinNextUnreserved = InvalidCycle;
2377 unsigned InstanceIdx = 0;
2378 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2379 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2380 assert(NumberOfInstances > 0 &&
2381 "Cannot have zero instances of a ProcResource");
2382
2383 if (isUnbufferedGroup(PIdx)) {
2384 // If any subunits are used by the instruction, report that the
2385 // subunits of the resource group are available at the first cycle
2386 // in which the unit is available, effectively removing the group
2387 // record from hazarding and basing the hazarding decisions on the
2388 // subunit records. Otherwise, choose the first available instance
2389 // from among the subunits. Specifications which assign cycles to
2390 // both the subunits and the group or which use an unbuffered
2391 // group with buffered subunits will appear to schedule
2392 // strangely. In the first case, the additional cycles for the
2393 // group will be ignored. In the second, the group will be
2394 // ignored entirely.
2395 for (const MCWriteProcResEntry &PE :
2398 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2399 return std::make_pair(getNextResourceCycleByInstance(
2400 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2401 StartIndex);
2402
2403 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2404 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2405 unsigned NextUnreserved, NextInstanceIdx;
2406 std::tie(NextUnreserved, NextInstanceIdx) =
2407 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2408 if (MinNextUnreserved > NextUnreserved) {
2409 InstanceIdx = NextInstanceIdx;
2410 MinNextUnreserved = NextUnreserved;
2411 }
2412 }
2413 return std::make_pair(MinNextUnreserved, InstanceIdx);
2414 }
2415
2416 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2417 ++I) {
2418 unsigned NextUnreserved =
2419 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2421 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2422 << NextUnreserved << "c\n");
2423 if (MinNextUnreserved > NextUnreserved) {
2424 InstanceIdx = I;
2425 MinNextUnreserved = NextUnreserved;
2426 }
2427 }
2429 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2430 << "[" << InstanceIdx - StartIndex << "]"
2431 << " available @" << MinNextUnreserved << "c"
2432 << "\n");
2433 return std::make_pair(MinNextUnreserved, InstanceIdx);
2434}
2435
2436/// Does this SU have a hazard within the current instruction group.
2437///
2438/// The scheduler supports two modes of hazard recognition. The first is the
2439/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2440/// supports highly complicated in-order reservation tables
2441/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2442///
2443/// The second is a streamlined mechanism that checks for hazards based on
2444/// simple counters that the scheduler itself maintains. It explicitly checks
2445/// for instruction dispatch limitations, including the number of micro-ops that
2446/// can dispatch per cycle.
2447///
2448/// TODO: Also check whether the SU must start a new group.
2450 if (HazardRec->isEnabled()
2452 return true;
2453 }
2454
2455 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2456 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2457 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2458 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2459 return true;
2460 }
2461
2462 if (CurrMOps > 0 &&
2463 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2464 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2465 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2466 << (isTop() ? "begin" : "end") << " group\n");
2467 return true;
2468 }
2469
2471 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2472 for (const MCWriteProcResEntry &PE :
2475 unsigned ResIdx = PE.ProcResourceIdx;
2476 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2477 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2478 unsigned NRCycle, InstanceIdx;
2479 std::tie(NRCycle, InstanceIdx) =
2480 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2481 if (NRCycle > CurrCycle) {
2482#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2483 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2484#endif
2485 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2486 << SchedModel->getResourceName(ResIdx)
2487 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2488 << "=" << NRCycle << "c\n");
2489 return true;
2490 }
2491 }
2492 }
2493 return false;
2494}
2495
2496// Find the unscheduled node in ReadySUs with the highest latency.
2499 SUnit *LateSU = nullptr;
2500 unsigned RemLatency = 0;
2501 for (SUnit *SU : ReadySUs) {
2502 unsigned L = getUnscheduledLatency(SU);
2503 if (L > RemLatency) {
2504 RemLatency = L;
2505 LateSU = SU;
2506 }
2507 }
2508 if (LateSU) {
2509 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2510 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2511 }
2512 return RemLatency;
2513}
2514
2515// Count resources in this zone and the remaining unscheduled
2516// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2517// resource index, or zero if the zone is issue limited.
2519getOtherResourceCount(unsigned &OtherCritIdx) {
2520 OtherCritIdx = 0;
2522 return 0;
2523
2524 unsigned OtherCritCount = Rem->RemIssueCount
2525 + (RetiredMOps * SchedModel->getMicroOpFactor());
2526 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2527 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2528 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2529 PIdx != PEnd; ++PIdx) {
2530 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2531 if (OtherCount > OtherCritCount) {
2532 OtherCritCount = OtherCount;
2533 OtherCritIdx = PIdx;
2534 }
2535 }
2536 if (OtherCritIdx) {
2537 LLVM_DEBUG(
2538 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2539 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2540 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2541 }
2542 return OtherCritCount;
2543}
2544
2545void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2546 unsigned Idx) {
2547 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2548
2549#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2550 // ReadyCycle was been bumped up to the CurrCycle when this node was
2551 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2552 // scheduling, so may now be greater than ReadyCycle.
2553 if (ReadyCycle > CurrCycle)
2554 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2555#endif
2556
2557 if (ReadyCycle < MinReadyCycle)
2558 MinReadyCycle = ReadyCycle;
2559
2560 // Check for interlocks first. For the purpose of other heuristics, an
2561 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2562 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2563 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2565
2566 if (!HazardDetected) {
2567 Available.push(SU);
2568
2569 if (InPQueue)
2571 return;
2572 }
2573
2574 if (!InPQueue)
2575 Pending.push(SU);
2576}
2577
2578/// Move the boundary of scheduled code by one cycle.
2579void SchedBoundary::bumpCycle(unsigned NextCycle) {
2580 if (SchedModel->getMicroOpBufferSize() == 0) {
2581 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2582 "MinReadyCycle uninitialized");
2583 if (MinReadyCycle > NextCycle)
2584 NextCycle = MinReadyCycle;
2585 }
2586 // Update the current micro-ops, which will issue in the next cycle.
2587 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2588 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2589
2590 // Decrement DependentLatency based on the next cycle.
2591 if ((NextCycle - CurrCycle) > DependentLatency)
2592 DependentLatency = 0;
2593 else
2594 DependentLatency -= (NextCycle - CurrCycle);
2595
2596 if (!HazardRec->isEnabled()) {
2597 // Bypass HazardRec virtual calls.
2598 CurrCycle = NextCycle;
2599 } else {
2600 // Bypass getHazardType calls in case of long latency.
2601 for (; CurrCycle != NextCycle; ++CurrCycle) {
2602 if (isTop())
2604 else
2606 }
2607 }
2608 CheckPending = true;
2609 IsResourceLimited =
2611 getScheduledLatency(), true);
2612
2613 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2614 << '\n');
2615}
2616
2617void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2618 ExecutedResCounts[PIdx] += Count;
2619 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2620 MaxExecutedResCount = ExecutedResCounts[PIdx];
2621}
2622
2623/// Add the given processor resource to this scheduled zone.
2624///
2625/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2626/// cycles during which this resource is released.
2627///
2628/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2629/// cycles at which the resource is aquired after issue (assuming no stalls).
2630///
2631/// \return the next cycle at which the instruction may execute without
2632/// oversubscribing resources.
2633unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2634 unsigned ReleaseAtCycle,
2635 unsigned NextCycle,
2636 unsigned AcquireAtCycle) {
2637 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2638 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2639 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2640 << ReleaseAtCycle << "x" << Factor << "u\n");
2641
2642 // Update Executed resources counts.
2643 incExecutedResources(PIdx, Count);
2644 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2645 Rem->RemainingCounts[PIdx] -= Count;
2646
2647 // Check if this resource exceeds the current critical resource. If so, it
2648 // becomes the critical resource.
2649 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2650 ZoneCritResIdx = PIdx;
2651 LLVM_DEBUG(dbgs() << " *** Critical resource "
2652 << SchedModel->getResourceName(PIdx) << ": "
2654 << "c\n");
2655 }
2656 // For reserved resources, record the highest cycle using the resource.
2657 unsigned NextAvailable, InstanceIdx;
2658 std::tie(NextAvailable, InstanceIdx) =
2659 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2660 if (NextAvailable > CurrCycle) {
2661 LLVM_DEBUG(dbgs() << " Resource conflict: "
2662 << SchedModel->getResourceName(PIdx)
2663 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2664 << " reserved until @" << NextAvailable << "\n");
2665 }
2666 return NextAvailable;
2667}
2668
2669/// Move the boundary of scheduled code by one SUnit.
2671 // Update the reservation table.
2672 if (HazardRec->isEnabled()) {
2673 if (!isTop() && SU->isCall) {
2674 // Calls are scheduled with their preceding instructions. For bottom-up
2675 // scheduling, clear the pipeline state before emitting.
2676 HazardRec->Reset();
2677 }
2679 // Scheduling an instruction may have made pending instructions available.
2680 CheckPending = true;
2681 }
2682 // checkHazard should prevent scheduling multiple instructions per cycle that
2683 // exceed the issue width.
2684 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2685 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2686 assert(
2687 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2688 "Cannot schedule this instruction's MicroOps in the current cycle.");
2689
2690 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2691 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2692
2693 unsigned NextCycle = CurrCycle;
2694 switch (SchedModel->getMicroOpBufferSize()) {
2695 case 0:
2696 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2697 break;
2698 case 1:
2699 if (ReadyCycle > NextCycle) {
2700 NextCycle = ReadyCycle;
2701 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2702 }
2703 break;
2704 default:
2705 // We don't currently model the OOO reorder buffer, so consider all
2706 // scheduled MOps to be "retired". We do loosely model in-order resource
2707 // latency. If this instruction uses an in-order resource, account for any
2708 // likely stall cycles.
2709 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2710 NextCycle = ReadyCycle;
2711 break;
2712 }
2713 RetiredMOps += IncMOps;
2714
2715 // Update resource counts and critical resource.
2717 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2718 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2719 Rem->RemIssueCount -= DecRemIssue;
2720 if (ZoneCritResIdx) {
2721 // Scale scheduled micro-ops for comparing with the critical resource.
2722 unsigned ScaledMOps =
2723 RetiredMOps * SchedModel->getMicroOpFactor();
2724
2725 // If scaled micro-ops are now more than the previous critical resource by
2726 // a full cycle, then micro-ops issue becomes critical.
2727 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2728 >= (int)SchedModel->getLatencyFactor()) {
2729 ZoneCritResIdx = 0;
2730 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2731 << ScaledMOps / SchedModel->getLatencyFactor()
2732 << "c\n");
2733 }
2734 }
2737 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2738 unsigned RCycle =
2739 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2740 PI->AcquireAtCycle);
2741 if (RCycle > NextCycle)
2742 NextCycle = RCycle;
2743 }
2744 if (SU->hasReservedResource) {
2745 // For reserved resources, record the highest cycle using the resource.
2746 // For top-down scheduling, this is the cycle in which we schedule this
2747 // instruction plus the number of cycles the operations reserves the
2748 // resource. For bottom-up is it simply the instruction's cycle.
2751 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2752 unsigned PIdx = PI->ProcResourceIdx;
2753 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2754
2756 unsigned ReservedUntil, InstanceIdx;
2757 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2758 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2759 if (isTop()) {
2760 ReservedResourceSegments[InstanceIdx].add(
2762 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2764 } else {
2765 ReservedResourceSegments[InstanceIdx].add(
2767 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2769 }
2770 } else {
2771
2772 unsigned ReservedUntil, InstanceIdx;
2773 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2774 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2775 if (isTop()) {
2776 ReservedCycles[InstanceIdx] =
2777 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2778 } else
2779 ReservedCycles[InstanceIdx] = NextCycle;
2780 }
2781 }
2782 }
2783 }
2784 }
2785 // Update ExpectedLatency and DependentLatency.
2786 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2787 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2788 if (SU->getDepth() > TopLatency) {
2789 TopLatency = SU->getDepth();
2790 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2791 << SU->NodeNum << ") " << TopLatency << "c\n");
2792 }
2793 if (SU->getHeight() > BotLatency) {
2794 BotLatency = SU->getHeight();
2795 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2796 << SU->NodeNum << ") " << BotLatency << "c\n");
2797 }
2798 // If we stall for any reason, bump the cycle.
2799 if (NextCycle > CurrCycle)
2800 bumpCycle(NextCycle);
2801 else
2802 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2803 // resource limited. If a stall occurred, bumpCycle does this.
2804 IsResourceLimited =
2806 getScheduledLatency(), true);
2807
2808 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2809 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2810 // one cycle. Since we commonly reach the max MOps here, opportunistically
2811 // bump the cycle to avoid uselessly checking everything in the readyQ.
2812 CurrMOps += IncMOps;
2813
2814 // Bump the cycle count for issue group constraints.
2815 // This must be done after NextCycle has been adjust for all other stalls.
2816 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2817 // currCycle to X.
2818 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2819 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2820 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2821 << " group\n");
2822 bumpCycle(++NextCycle);
2823 }
2824
2825 while (CurrMOps >= SchedModel->getIssueWidth()) {
2826 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2827 << CurrCycle << '\n');
2828 bumpCycle(++NextCycle);
2829 }
2831}
2832
2833/// Release pending ready nodes in to the available queue. This makes them
2834/// visible to heuristics.
2836 // If the available queue is empty, it is safe to reset MinReadyCycle.
2837 if (Available.empty())
2838 MinReadyCycle = std::numeric_limits<unsigned>::max();
2839
2840 // Check to see if any of the pending instructions are ready to issue. If
2841 // so, add them to the available queue.
2842 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2843 SUnit *SU = *(Pending.begin() + I);
2844 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2845
2846 if (ReadyCycle < MinReadyCycle)
2847 MinReadyCycle = ReadyCycle;
2848
2849 if (Available.size() >= ReadyListLimit)
2850 break;
2851
2852 releaseNode(SU, ReadyCycle, true, I);
2853 if (E != Pending.size()) {
2854 --I;
2855 --E;
2856 }
2857 }
2858 CheckPending = false;
2859}
2860
2861/// Remove SU from the ready set for this boundary.
2863 if (Available.isInQueue(SU))
2865 else {
2866 assert(Pending.isInQueue(SU) && "bad ready count");
2868 }
2869}
2870
2871/// If this queue only has one ready candidate, return it. As a side effect,
2872/// defer any nodes that now hit a hazard, and advance the cycle until at least
2873/// one node is ready. If multiple instructions are ready, return NULL.
2875 if (CheckPending)
2877
2878 // Defer any ready instrs that now have a hazard.
2880 if (checkHazard(*I)) {
2881 Pending.push(*I);
2882 I = Available.remove(I);
2883 continue;
2884 }
2885 ++I;
2886 }
2887 for (unsigned i = 0; Available.empty(); ++i) {
2888// FIXME: Re-enable assert once PR20057 is resolved.
2889// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2890// "permanent hazard");
2891 (void)i;
2892 bumpCycle(CurrCycle + 1);
2894 }
2895
2898
2899 if (Available.size() == 1)
2900 return *Available.begin();
2901 return nullptr;
2902}
2903
2904#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2905
2906/// Dump the content of the \ref ReservedCycles vector for the
2907/// resources that are used in the basic block.
2908///
2911 return;
2912
2913 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2914 unsigned StartIdx = 0;
2915
2916 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2917 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2918 std::string ResName = SchedModel->getResourceName(ResIdx);
2919 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2920 dbgs() << ResName << "(" << UnitIdx << ") = ";
2922 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2923 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2924 else
2925 dbgs() << "{ }\n";
2926 } else
2927 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2928 }
2929 StartIdx += NumUnits;
2930 }
2931}
2932
2933// This is useful information to dump after bumpNode.
2934// Note that the Queue contents are more useful before pickNodeFromQueue.
2936 unsigned ResFactor;
2937 unsigned ResCount;
2938 if (ZoneCritResIdx) {
2939 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2940 ResCount = getResourceCount(ZoneCritResIdx);
2941 } else {
2942 ResFactor = SchedModel->getMicroOpFactor();
2943 ResCount = RetiredMOps * ResFactor;
2944 }
2945 unsigned LFactor = SchedModel->getLatencyFactor();
2946 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2947 << " Retired: " << RetiredMOps;
2948 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2949 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2950 << ResCount / ResFactor << " "
2951 << SchedModel->getResourceName(ZoneCritResIdx)
2952 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2953 << (IsResourceLimited ? " - Resource" : " - Latency")
2954 << " limited.\n";
2957}
2958#endif
2959
2960//===----------------------------------------------------------------------===//
2961// GenericScheduler - Generic implementation of MachineSchedStrategy.
2962//===----------------------------------------------------------------------===//
2963
2966 const TargetSchedModel *SchedModel) {
2968 return;
2969
2970 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2973 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2974 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2975 ResDelta.CritResources += PI->ReleaseAtCycle;
2976 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2977 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2978 }
2979}
2980
2981/// Compute remaining latency. We need this both to determine whether the
2982/// overall schedule has become latency-limited and whether the instructions
2983/// outside this zone are resource or latency limited.
2984///
2985/// The "dependent" latency is updated incrementally during scheduling as the
2986/// max height/depth of scheduled nodes minus the cycles since it was
2987/// scheduled:
2988/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2989///
2990/// The "independent" latency is the max ready queue depth:
2991/// ILat = max N.depth for N in Available|Pending
2992///
2993/// RemainingLatency is the greater of independent and dependent latency.
2994///
2995/// These computations are expensive, especially in DAGs with many edges, so
2996/// only do them if necessary.
2997static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2998 unsigned RemLatency = CurrZone.getDependentLatency();
2999 RemLatency = std::max(RemLatency,
3000 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3001 RemLatency = std::max(RemLatency,
3002 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3003 return RemLatency;
3004}
3005
3006/// Returns true if the current cycle plus remaning latency is greater than
3007/// the critical path in the scheduling region.
3008bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3009 SchedBoundary &CurrZone,
3010 bool ComputeRemLatency,
3011 unsigned &RemLatency) const {
3012 // The current cycle is already greater than the critical path, so we are
3013 // already latency limited and don't need to compute the remaining latency.
3014 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3015 return true;
3016
3017 // If we haven't scheduled anything yet, then we aren't latency limited.
3018 if (CurrZone.getCurrCycle() == 0)
3019 return false;
3020
3021 if (ComputeRemLatency)
3022 RemLatency = computeRemLatency(CurrZone);
3023
3024 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3025}
3026
3027/// Set the CandPolicy given a scheduling zone given the current resources and
3028/// latencies inside and outside the zone.
3030 SchedBoundary &CurrZone,
3031 SchedBoundary *OtherZone) {
3032 // Apply preemptive heuristics based on the total latency and resources
3033 // inside and outside this zone. Potential stalls should be considered before
3034 // following this policy.
3035
3036 // Compute the critical resource outside the zone.
3037 unsigned OtherCritIdx = 0;
3038 unsigned OtherCount =
3039 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3040
3041 bool OtherResLimited = false;
3042 unsigned RemLatency = 0;
3043 bool RemLatencyComputed = false;
3044 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3045 RemLatency = computeRemLatency(CurrZone);
3046 RemLatencyComputed = true;
3047 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3048 OtherCount, RemLatency, false);
3049 }
3050
3051 // Schedule aggressively for latency in PostRA mode. We don't check for
3052 // acyclic latency during PostRA, and highly out-of-order processors will
3053 // skip PostRA scheduling.
3054 if (!OtherResLimited &&
3055 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3056 RemLatency))) {
3057 Policy.ReduceLatency |= true;
3058 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3059 << " RemainingLatency " << RemLatency << " + "
3060 << CurrZone.getCurrCycle() << "c > CritPath "
3061 << Rem.CriticalPath << "\n");
3062 }
3063 // If the same resource is limiting inside and outside the zone, do nothing.
3064 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3065 return;
3066
3067 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3068 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3069 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3070 } if (OtherResLimited) dbgs()
3071 << " RemainingLimit: "
3072 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3073 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3074 << " Latency limited both directions.\n");
3075
3076 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3077 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3078
3079 if (OtherResLimited)
3080 Policy.DemandResIdx = OtherCritIdx;
3081}
3082
3083#ifndef NDEBUG
3086 switch (Reason) {
3087 case NoCand: return "NOCAND ";
3088 case Only1: return "ONLY1 ";
3089 case PhysReg: return "PHYS-REG ";
3090 case RegExcess: return "REG-EXCESS";
3091 case RegCritical: return "REG-CRIT ";
3092 case Stall: return "STALL ";
3093 case Cluster: return "CLUSTER ";
3094 case Weak: return "WEAK ";
3095 case RegMax: return "REG-MAX ";
3096 case ResourceReduce: return "RES-REDUCE";
3097 case ResourceDemand: return "RES-DEMAND";
3098 case TopDepthReduce: return "TOP-DEPTH ";
3099 case TopPathReduce: return "TOP-PATH ";
3100 case BotHeightReduce:return "BOT-HEIGHT";
3101 case BotPathReduce: return "BOT-PATH ";
3102 case NextDefUse: return "DEF-USE ";
3103 case NodeOrder: return "ORDER ";
3104 };
3105 llvm_unreachable("Unknown reason!");
3106}
3107
3110 unsigned ResIdx = 0;
3111 unsigned Latency = 0;
3112 switch (Cand.Reason) {
3113 default:
3114 break;
3115 case RegExcess:
3116 P = Cand.RPDelta.Excess;
3117 break;
3118 case RegCritical:
3119 P = Cand.RPDelta.CriticalMax;
3120 break;
3121 case RegMax:
3122 P = Cand.RPDelta.CurrentMax;
3123 break;
3124 case ResourceReduce:
3125 ResIdx = Cand.Policy.ReduceResIdx;
3126 break;
3127 case ResourceDemand:
3128 ResIdx = Cand.Policy.DemandResIdx;
3129 break;
3130 case TopDepthReduce:
3131 Latency = Cand.SU->getDepth();
3132 break;
3133 case TopPathReduce:
3134 Latency = Cand.SU->getHeight();
3135 break;
3136 case BotHeightReduce:
3137 Latency = Cand.SU->getHeight();
3138 break;
3139 case BotPathReduce:
3140 Latency = Cand.SU->getDepth();
3141 break;
3142 }
3143 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3144 if (P.isValid())
3145 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3146 << ":" << P.getUnitInc() << " ";
3147 else
3148 dbgs() << " ";
3149 if (ResIdx)
3150 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3151 else
3152 dbgs() << " ";
3153 if (Latency)
3154 dbgs() << " " << Latency << " cycles ";
3155 else
3156 dbgs() << " ";
3157 dbgs() << '\n';
3158}
3159#endif
3160
3161namespace llvm {
3162/// Return true if this heuristic determines order.
3163/// TODO: Consider refactor return type of these functions as integer or enum,
3164/// as we may need to differentiate whether TryCand is better than Cand.
3165bool tryLess(int TryVal, int CandVal,
3169 if (TryVal < CandVal) {
3170 TryCand.Reason = Reason;
3171 return true;
3172 }
3173 if (TryVal > CandVal) {
3174 if (Cand.Reason > Reason)
3175 Cand.Reason = Reason;
3176 return true;
3177 }
3178 return false;
3179}
3180
3181bool tryGreater(int TryVal, int CandVal,
3185 if (TryVal > CandVal) {
3186 TryCand.Reason = Reason;
3187 return true;
3188 }
3189 if (TryVal < CandVal) {
3190 if (Cand.Reason > Reason)
3191 Cand.Reason = Reason;
3192 return true;
3193 }
3194 return false;
3195}
3196
3199 SchedBoundary &Zone) {
3200 if (Zone.isTop()) {
3201 // Prefer the candidate with the lesser depth, but only if one of them has
3202 // depth greater than the total latency scheduled so far, otherwise either
3203 // of them could be scheduled now with no stall.
3204 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3205 Zone.getScheduledLatency()) {
3206 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3208 return true;
3209 }
3210 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3212 return true;
3213 } else {
3214 // Prefer the candidate with the lesser height, but only if one of them has
3215 // height greater than the total latency scheduled so far, otherwise either
3216 // of them could be scheduled now with no stall.
3217 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3218 Zone.getScheduledLatency()) {
3219 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3221 return true;
3222 }
3223 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3225 return true;
3226 }
3227 return false;
3228}
3229} // end namespace llvm
3230
3231static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3232 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3233 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3234}
3235
3237 tracePick(Cand.Reason, Cand.AtTop);
3238}
3239
3241 assert(dag->hasVRegLiveness() &&
3242 "(PreRA)GenericScheduler needs vreg liveness");
3243 DAG = static_cast<ScheduleDAGMILive*>(dag);
3244 SchedModel = DAG->getSchedModel();
3245 TRI = DAG->TRI;
3246
3247 if (RegionPolicy.ComputeDFSResult)
3248 DAG->computeDFSResult();
3249
3250 Rem.init(DAG, SchedModel);
3251 Top.init(DAG, SchedModel, &Rem);
3252 Bot.init(DAG, SchedModel, &Rem);
3253
3254 // Initialize resource counts.
3255
3256 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3257 // are disabled, then these HazardRecs will be disabled.
3259 if (!Top.HazardRec) {
3260 Top.HazardRec =
3262 Itin, DAG);
3263 }
3264 if (!Bot.HazardRec) {
3265 Bot.HazardRec =
3267 Itin, DAG);
3268 }
3269 TopCand.SU = nullptr;
3270 BotCand.SU = nullptr;
3271}
3272
3273/// Initialize the per-region scheduling policy.
3276 unsigned NumRegionInstrs) {
3277 const MachineFunction &MF = *Begin->getMF();
3278 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3279
3280 // Avoid setting up the register pressure tracker for small regions to save
3281 // compile time. As a rough heuristic, only track pressure when the number of
3282 // schedulable instructions exceeds half the integer register file.
3283 RegionPolicy.ShouldTrackPressure = true;
3284 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
3286 if (TLI->isTypeLegal(LegalIntVT)) {
3287 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3288 TLI->getRegClassFor(LegalIntVT));
3289 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3290 }
3291 }
3292
3293 // For generic targets, we default to bottom-up, because it's simpler and more
3294 // compile-time optimizations have been implemented in that direction.
3295 RegionPolicy.OnlyBottomUp = true;
3296
3297 // Allow the subtarget to override default policy.
3298 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3299
3300 // After subtarget overrides, apply command line options.
3301 if (!EnableRegPressure) {
3302 RegionPolicy.ShouldTrackPressure = false;
3303 RegionPolicy.ShouldTrackLaneMasks = false;
3304 }
3305
3306 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3307 // e.g. -misched-bottomup=false allows scheduling in both directions.
3309 "-misched-topdown incompatible with -misched-bottomup");
3310 if (ForceBottomUp.getNumOccurrences() > 0) {
3311 RegionPolicy.OnlyBottomUp = ForceBottomUp;
3312 if (RegionPolicy.OnlyBottomUp)
3313 RegionPolicy.OnlyTopDown = false;
3314 }
3315 if (ForceTopDown.getNumOccurrences() > 0) {
3316 RegionPolicy.OnlyTopDown = ForceTopDown;
3317 if (RegionPolicy.OnlyTopDown)
3318 RegionPolicy.OnlyBottomUp = false;
3319 }
3320}
3321
3323 // Cannot completely remove virtual function even in release mode.
3324#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3325 dbgs() << "GenericScheduler RegionPolicy: "
3326 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3327 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3328 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3329 << "\n";
3330#endif
3331}
3332
3333/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3334/// critical path by more cycles than it takes to drain the instruction buffer.
3335/// We estimate an upper bounds on in-flight instructions as:
3336///
3337/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3338/// InFlightIterations = AcyclicPath / CyclesPerIteration
3339/// InFlightResources = InFlightIterations * LoopResources
3340///
3341/// TODO: Check execution resources in addition to IssueCount.
3344 return;
3345
3346 // Scaled number of cycles per loop iteration.
3347 unsigned IterCount =
3350 // Scaled acyclic critical path.
3351 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3352 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3353 unsigned InFlightCount =
3354 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3355 unsigned BufferLimit =
3357
3358 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3359
3360 LLVM_DEBUG(
3361 dbgs() << "IssueCycles="
3363 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3364 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3365 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3366 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3367 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3368}
3369
3372
3373 // Some roots may not feed into ExitSU. Check all of them in case.
3374 for (const SUnit *SU : Bot.Available) {
3375 if (SU->getDepth() > Rem.CriticalPath)
3376 Rem.CriticalPath = SU->getDepth();
3377 }
3378 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3380 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3381 }
3382
3384 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3385 checkAcyclicLatency();
3386 }
3387}
3388
3389namespace llvm {
3391 const PressureChange &CandP,
3395 const TargetRegisterInfo *TRI,
3396 const MachineFunction &MF) {
3397 // If one candidate decreases and the other increases, go with it.
3398 // Invalid candidates have UnitInc==0.
3399 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3400 Reason)) {
3401 return true;
3402 }
3403 // Do not compare the magnitude of pressure changes between top and bottom
3404 // boundary.
3405 if (Cand.AtTop != TryCand.AtTop)
3406 return false;
3407
3408 // If both candidates affect the same set in the same boundary, go with the
3409 // smallest increase.
3410 unsigned TryPSet = TryP.getPSetOrMax();
3411 unsigned CandPSet = CandP.getPSetOrMax();
3412 if (TryPSet == CandPSet) {
3413 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3414 Reason);
3415 }
3416
3417 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3418 std::numeric_limits<int>::max();
3419
3420 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3421 std::numeric_limits<int>::max();
3422
3423 // If the candidates are decreasing pressure, reverse priority.
3424 if (TryP.getUnitInc() < 0)
3425 std::swap(TryRank, CandRank);
3426 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3427}
3428
3429unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3430 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3431}
3432
3433/// Minimize physical register live ranges. Regalloc wants them adjacent to
3434/// their physreg def/use.
3435///
3436/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3437/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3438/// with the operation that produces or consumes the physreg. We'll do this when
3439/// regalloc has support for parallel copies.
3440int biasPhysReg(const SUnit *SU, bool isTop) {
3441 const MachineInstr *MI = SU->getInstr();
3442
3443 if (MI->isCopy()) {
3444 unsigned ScheduledOper = isTop ? 1 : 0;
3445 unsigned UnscheduledOper = isTop ? 0 : 1;
3446 // If we have already scheduled the physreg produce/consumer, immediately
3447 // schedule the copy.
3448 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3449 return 1;
3450 // If the physreg is at the boundary, defer it. Otherwise schedule it
3451 // immediately to free the dependent. We can hoist the copy later.
3452 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3453 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3454 return AtBoundary ? -1 : 1;
3455 }
3456
3457 if (MI->isMoveImmediate()) {
3458 // If we have a move immediate and all successors have been assigned, bias
3459 // towards scheduling this later. Make sure all register defs are to
3460 // physical registers.
3461 bool DoBias = true;
3462 for (const MachineOperand &Op : MI->defs()) {
3463 if (Op.isReg() && !Op.getReg().isPhysical()) {
3464 DoBias = false;
3465 break;
3466 }
3467 }
3468
3469 if (DoBias)
3470 return isTop ? -1 : 1;
3471 }
3472
3473 return 0;
3474}
3475} // end namespace llvm
3476
3478 bool AtTop,
3479 const RegPressureTracker &RPTracker,
3480 RegPressureTracker &TempTracker) {
3481 Cand.SU = SU;
3482 Cand.AtTop = AtTop;
3483 if (DAG->isTrackingPressure()) {
3484 if (AtTop) {
3485 TempTracker.getMaxDownwardPressureDelta(
3486 Cand.SU->getInstr(),
3487 Cand.RPDelta,
3488 DAG->getRegionCriticalPSets(),
3489 DAG->getRegPressure().MaxSetPressure);
3490 } else {
3491 if (VerifyScheduling) {
3492 TempTracker.getMaxUpwardPressureDelta(
3493 Cand.SU->getInstr(),
3494 &DAG->getPressureDiff(Cand.SU),
3495 Cand.RPDelta,
3496 DAG->getRegionCriticalPSets(),
3497 DAG->getRegPressure().MaxSetPressure);
3498 } else {
3499 RPTracker.getUpwardPressureDelta(
3500 Cand.SU->getInstr(),
3501 DAG->getPressureDiff(Cand.SU),
3502 Cand.RPDelta,
3503 DAG->getRegionCriticalPSets(),
3504 DAG->getRegPressure().MaxSetPressure);
3505 }
3506 }
3507 }
3508 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3509 << " Try SU(" << Cand.SU->NodeNum << ") "
3511 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3512}
3513
3514/// Apply a set of heuristics to a new candidate. Heuristics are currently
3515/// hierarchical. This may be more efficient than a graduated cost model because
3516/// we don't need to evaluate all aspects of the model for each node in the
3517/// queue. But it's really done to make the heuristics easier to debug and
3518/// statistically analyze.
3519///
3520/// \param Cand provides the policy and current best candidate.
3521/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3522/// \param Zone describes the scheduled zone that we are extending, or nullptr
3523/// if Cand is from a different zone than TryCand.
3524/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3526 SchedCandidate &TryCand,
3527 SchedBoundary *Zone) const {
3528 // Initialize the candidate if needed.
3529 if (!Cand.isValid()) {
3530 TryCand.Reason = NodeOrder;
3531 return true;
3532 }
3533
3534 // Bias PhysReg Defs and copies to their uses and defined respectively.
3535 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3536 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3537 return TryCand.Reason != NoCand;
3538
3539 // Avoid exceeding the target's limit.
3540 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3541 Cand.RPDelta.Excess,
3542 TryCand, Cand, RegExcess, TRI,
3543 DAG->MF))
3544 return TryCand.Reason != NoCand;
3545
3546 // Avoid increasing the max critical pressure in the scheduled region.
3547 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3548 Cand.RPDelta.CriticalMax,
3549 TryCand, Cand, RegCritical, TRI,
3550 DAG->MF))
3551 return TryCand.Reason != NoCand;
3552
3553 // We only compare a subset of features when comparing nodes between
3554 // Top and Bottom boundary. Some properties are simply incomparable, in many
3555 // other instances we should only override the other boundary if something
3556 // is a clear good pick on one boundary. Skip heuristics that are more
3557 // "tie-breaking" in nature.
3558 bool SameBoundary = Zone != nullptr;
3559 if (SameBoundary) {
3560 // For loops that are acyclic path limited, aggressively schedule for
3561 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3562 // heuristics to take precedence.
3563 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3564 tryLatency(TryCand, Cand, *Zone))
3565 return TryCand.Reason != NoCand;
3566
3567 // Prioritize instructions that read unbuffered resources by stall cycles.
3568 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3569 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3570 return TryCand.Reason != NoCand;
3571 }
3572
3573 // Keep clustered nodes together to encourage downstream peephole
3574 // optimizations which may reduce resource requirements.
3575 //
3576 // This is a best effort to set things up for a post-RA pass. Optimizations
3577 // like generating loads of multiple registers should ideally be done within
3578 // the scheduler pass by combining the loads during DAG postprocessing.
3579 const SUnit *CandNextClusterSU =
3580 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3581 const SUnit *TryCandNextClusterSU =
3582 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3583 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3584 Cand.SU == CandNextClusterSU,
3585 TryCand, Cand, Cluster))
3586 return TryCand.Reason != NoCand;
3587
3588 if (SameBoundary) {
3589 // Weak edges are for clustering and other constraints.
3590 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3591 getWeakLeft(Cand.SU, Cand.AtTop),
3592 TryCand, Cand, Weak))
3593 return TryCand.Reason != NoCand;
3594 }
3595
3596 // Avoid increasing the max pressure of the entire region.
3597 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3598 Cand.RPDelta.CurrentMax,
3599 TryCand, Cand, RegMax, TRI,
3600 DAG->MF))
3601 return TryCand.Reason != NoCand;
3602
3603 if (SameBoundary) {
3604 // Avoid critical resource consumption and balance the schedule.
3605 TryCand.initResourceDelta(DAG, SchedModel);
3607 TryCand, Cand, ResourceReduce))
3608 return TryCand.Reason != NoCand;
3611 TryCand, Cand, ResourceDemand))
3612 return TryCand.Reason != NoCand;
3613
3614 // Avoid serializing long latency dependence chains.
3615 // For acyclic path limited loops, latency was already checked above.
3616 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3617 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3618 return TryCand.Reason != NoCand;
3619
3620 // Fall through to original instruction order.
3621 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3622 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3623 TryCand.Reason = NodeOrder;
3624 return true;
3625 }
3626 }
3627
3628 return false;
3629}
3630
3631/// Pick the best candidate from the queue.
3632///
3633/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3634/// DAG building. To adjust for the current scheduling location we need to
3635/// maintain the number of vreg uses remaining to be top-scheduled.
3637 const CandPolicy &ZonePolicy,
3638 const RegPressureTracker &RPTracker,
3639 SchedCandidate &Cand) {
3640 // getMaxPressureDelta temporarily modifies the tracker.
3641 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3642
3643 ReadyQueue &Q = Zone.Available;
3644 for (SUnit *SU : Q) {
3645
3646 SchedCandidate TryCand(ZonePolicy);
3647 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3648 // Pass SchedBoundary only when comparing nodes from the same boundary.
3649 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3650 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3651 // Initialize resource delta if needed in case future heuristics query it.
3652 if (TryCand.ResDelta == SchedResourceDelta())
3653 TryCand.initResourceDelta(DAG, SchedModel);
3654 Cand.setBest(TryCand);
3656 }
3657 }
3658}
3659
3660/// Pick the best candidate node from either the top or bottom queue.
3662 // Schedule as far as possible in the direction of no choice. This is most
3663 // efficient, but also provides the best heuristics for CriticalPSets.
3664 if (SUnit *SU = Bot.pickOnlyChoice()) {
3665 IsTopNode = false;
3666 tracePick(Only1, false);
3667 return SU;
3668 }
3669 if (SUnit *SU = Top.pickOnlyChoice()) {
3670 IsTopNode = true;
3671 tracePick(Only1, true);
3672 return SU;
3673 }
3674 // Set the bottom-up policy based on the state of the current bottom zone and
3675 // the instructions outside the zone, including the top zone.
3676 CandPolicy BotPolicy;
3677 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3678 // Set the top-down policy based on the state of the current top zone and
3679 // the instructions outside the zone, including the bottom zone.
3680 CandPolicy TopPolicy;
3681 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3682
3683 // See if BotCand is still valid (because we previously scheduled from Top).
3684 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3685 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3686 BotCand.Policy != BotPolicy) {
3687 BotCand.reset(CandPolicy());
3688 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3689 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3690 } else {
3691 LLVM_DEBUG(traceCandidate(BotCand));
3692#ifndef NDEBUG
3693 if (VerifyScheduling) {
3694 SchedCandidate TCand;
3695 TCand.reset(CandPolicy());
3696 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3697 assert(TCand.SU == BotCand.SU &&
3698 "Last pick result should correspond to re-picking right now");
3699 }
3700#endif
3701 }
3702
3703 // Check if the top Q has a better candidate.
3704 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3705 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3706 TopCand.Policy != TopPolicy) {
3707 TopCand.reset(CandPolicy());
3708 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3709 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3710 } else {
3711 LLVM_DEBUG(traceCandidate(TopCand));
3712#ifndef NDEBUG
3713 if (VerifyScheduling) {
3714 SchedCandidate TCand;
3715 TCand.reset(CandPolicy());
3716 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3717 assert(TCand.SU == TopCand.SU &&
3718 "Last pick result should correspond to re-picking right now");
3719 }
3720#endif
3721 }
3722
3723 // Pick best from BotCand and TopCand.
3724 assert(BotCand.isValid());
3725 assert(TopCand.isValid());
3726 SchedCandidate Cand = BotCand;
3727 TopCand.Reason = NoCand;
3728 if (tryCandidate(Cand, TopCand, nullptr)) {
3729 Cand.setBest(TopCand);
3731 }
3732
3733 IsTopNode = Cand.AtTop;
3734 tracePick(Cand);
3735 return Cand.SU;
3736}
3737
3738/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3740 if (DAG->top() == DAG->bottom()) {
3741 assert(Top.Available.empty() && Top.Pending.empty() &&
3742 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3743 return nullptr;
3744 }
3745 SUnit *SU;
3746 do {
3747 if (RegionPolicy.OnlyTopDown) {
3748 SU = Top.pickOnlyChoice();
3749 if (!SU) {
3750 CandPolicy NoPolicy;
3751 TopCand.reset(NoPolicy);
3752 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3753 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3754 tracePick(TopCand);
3755 SU = TopCand.SU;
3756 }
3757 IsTopNode = true;
3758 } else if (RegionPolicy.OnlyBottomUp) {
3759 SU = Bot.pickOnlyChoice();
3760 if (!SU) {
3761 CandPolicy NoPolicy;
3762 BotCand.reset(NoPolicy);
3763 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3764 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3765 tracePick(BotCand);
3766 SU = BotCand.SU;
3767 }
3768 IsTopNode = false;
3769 } else {
3770 SU = pickNodeBidirectional(IsTopNode);
3771 }
3772 } while (SU->isScheduled);
3773
3774 if (SU->isTopReady())
3775 Top.removeReady(SU);
3776 if (SU->isBottomReady())
3777 Bot.removeReady(SU);
3778
3779 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3780 << *SU->getInstr());
3781 return SU;
3782}
3783
3785 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3786 if (!isTop)
3787 ++InsertPos;
3788 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3789
3790 // Find already scheduled copies with a single physreg dependence and move
3791 // them just above the scheduled instruction.
3792 for (SDep &Dep : Deps) {
3793 if (Dep.getKind() != SDep::Data ||
3794 !Register::isPhysicalRegister(Dep.getReg()))
3795 continue;
3796 SUnit *DepSU = Dep.getSUnit();
3797 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3798 continue;
3799 MachineInstr *Copy = DepSU->getInstr();
3800 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3801 continue;
3802 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3803 DAG->dumpNode(*Dep.getSUnit()));
3804 DAG->moveInstruction(Copy, InsertPos);
3805 }
3806}
3807
3808/// Update the scheduler's state after scheduling a node. This is the same node
3809/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3810/// update it's state based on the current cycle before MachineSchedStrategy
3811/// does.
3812///
3813/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3814/// them here. See comments in biasPhysReg.
3815void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3816 if (IsTopNode) {
3817 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3818 Top.bumpNode(SU);
3819 if (SU->hasPhysRegUses)
3820 reschedulePhysReg(SU, true);
3821 } else {
3822 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3823 Bot.bumpNode(SU);
3824 if (SU->hasPhysRegDefs)
3825 reschedulePhysReg(SU, false);
3826 }
3827}
3828
3829/// Create the standard converging machine scheduler. This will be used as the
3830/// default scheduler if the target does not set a default.
3832 ScheduleDAGMILive *DAG =
3833 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3834 // Register DAG post-processors.
3835 //
3836 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3837 // data and pass it to later mutations. Have a single mutation that gathers
3838 // the interesting nodes in one pass.
3840
3841 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3842 // Add MacroFusion mutation if fusions are not empty.
3843 const auto &MacroFusions = STI.getMacroFusions();
3844 if (!MacroFusions.empty())
3845 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3846 return DAG;
3847}
3848
3850 return createGenericSchedLive(C);
3851}
3852
3854GenericSchedRegistry("converge", "Standard converging scheduler.",
3856
3857//===----------------------------------------------------------------------===//
3858// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3859//===----------------------------------------------------------------------===//
3860
3862 DAG = Dag;
3863 SchedModel = DAG->getSchedModel();
3864 TRI = DAG->TRI;
3865
3866 Rem.init(DAG, SchedModel);
3867 Top.init(DAG, SchedModel, &Rem);
3868 Bot.init(DAG, SchedModel, &Rem);
3869
3870 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3871 // or are disabled, then these HazardRecs will be disabled.
3873 if (!Top.HazardRec) {
3874 Top.HazardRec =
3876 Itin, DAG);
3877 }
3878 if (!Bot.HazardRec) {
3879 Bot.HazardRec =
3881 Itin, DAG);
3882 }
3883}
3884
3887 unsigned NumRegionInstrs) {
3889 RegionPolicy.OnlyTopDown = true;
3890 RegionPolicy.OnlyBottomUp = false;
3892 RegionPolicy.OnlyTopDown = false;
3893 RegionPolicy.OnlyBottomUp = true;
3894 }
3895}
3896
3899
3900 // Some roots may not feed into ExitSU. Check all of them in case.
3901 for (const SUnit *SU : Bot.Available) {
3902 if (SU->getDepth() > Rem.CriticalPath)
3903 Rem.CriticalPath = SU->getDepth();
3904 }
3905 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3907 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3908 }
3909}
3910
3911/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3912///
3913/// \param Cand provides the policy and current best candidate.
3914/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3915/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3917 SchedCandidate &TryCand) {
3918 // Initialize the candidate if needed.
3919 if (!Cand.isValid()) {
3920 TryCand.Reason = NodeOrder;
3921 return true;
3922 }
3923
3924 // Prioritize instructions that read unbuffered resources by stall cycles.
3925 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3926 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3927 return TryCand.Reason != NoCand;
3928
3929 // Keep clustered nodes together.
3930 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3931 Cand.SU == DAG->getNextClusterSucc(),
3932 TryCand, Cand, Cluster))
3933 return TryCand.Reason != NoCand;
3934
3935 // Avoid critical resource consumption and balance the schedule.
3937 TryCand, Cand, ResourceReduce))
3938 return TryCand.Reason != NoCand;
3941 TryCand, Cand, ResourceDemand))
3942 return TryCand.Reason != NoCand;
3943
3944 // Avoid serializing long latency dependence chains.
3945 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3946 return TryCand.Reason != NoCand;
3947 }
3948
3949 // Fall through to original instruction order.
3950 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3951 TryCand.Reason = NodeOrder;
3952 return true;
3953 }
3954
3955 return false;
3956}
3957
3959 SchedCandidate &Cand) {
3960 ReadyQueue &Q = Zone.Available;
3961 for (SUnit *SU : Q) {
3962 SchedCandidate TryCand(Cand.Policy);
3963 TryCand.SU = SU;
3964 TryCand.AtTop = Zone.isTop();
3965 TryCand.initResourceDelta(DAG, SchedModel);
3966 if (tryCandidate(Cand, TryCand)) {
3967 Cand.setBest(TryCand);
3969 }
3970 }
3971}
3972
3973/// Pick the next node to schedule.
3975 if (DAG->top() == DAG->bottom()) {
3976 assert(Top.Available.empty() && Top.Pending.empty() &&
3977 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3978 return nullptr;
3979 }
3980 SUnit *SU;
3981 do {
3982 if (RegionPolicy.OnlyBottomUp) {
3983 assert(!RegionPolicy.OnlyTopDown);
3984 SU = Bot.pickOnlyChoice();
3985 if (SU) {
3986 tracePick(Only1, true);
3987 } else {
3988 CandPolicy NoPolicy;
3989 SchedCandidate BotCand(NoPolicy);
3990 // Set the bottom-up policy based on the state of the current bottom
3991 // zone and the instructions outside the zone, including the top zone.
3992 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
3993 pickNodeFromQueue(Bot, BotCand);
3994 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3995 tracePick(BotCand);
3996 SU = BotCand.SU;
3997 }
3998 IsTopNode = false;
3999 } else {
4000
4001 assert(RegionPolicy.OnlyTopDown);
4002 SU = Top.pickOnlyChoice();
4003 if (SU) {
4004 tracePick(Only1, true);
4005 } else {
4006 CandPolicy NoPolicy;
4007 SchedCandidate TopCand(NoPolicy);
4008 // Set the top-down policy based on the state of the current top zone
4009 // and the instructions outside the zone, including the bottom zone.
4010 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4011 pickNodeFromQueue(Top, TopCand);
4012 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4013 tracePick(TopCand);
4014 SU = TopCand.SU;
4015 }
4016 IsTopNode = true;
4017 }
4018 } while (SU->isScheduled);
4019
4020 if (SU->isTopReady())
4021 Top.removeReady(SU);
4022 if (SU->isBottomReady())
4023 Bot.removeReady(SU);
4024
4025 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4026 << *SU->getInstr());
4027 return SU;
4028}
4029
4030/// Called after ScheduleDAGMI has scheduled an instruction and updated
4031/// scheduled/remaining flags in the DAG nodes.
4032void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4033 if (IsTopNode) {
4034 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4035 Top.bumpNode(SU);
4036 } else {
4037 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4038 Bot.bumpNode(SU);
4039 }
4040}
4041
4043 ScheduleDAGMI *DAG =
4044 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4045 /*RemoveKillFlags=*/true);
4046 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4047 // Add MacroFusion mutation if fusions are not empty.
4048 const auto &MacroFusions = STI.getMacroFusions();
4049 if (!MacroFusions.empty())
4050 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4051 return DAG;
4052}
4053
4054//===----------------------------------------------------------------------===//
4055// ILP Scheduler. Currently for experimental analysis of heuristics.
4056//===----------------------------------------------------------------------===//
4057
4058namespace {
4059
4060/// Order nodes by the ILP metric.
4061struct ILPOrder {
4062 const SchedDFSResult *DFSResult = nullptr;
4063 const BitVector *ScheduledTrees = nullptr;
4064 bool MaximizeILP;
4065
4066 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4067
4068 /// Apply a less-than relation on node priority.
4069 ///
4070 /// (Return true if A comes after B in the Q.)
4071 bool operator()(const SUnit *A, const SUnit *B) const {
4072 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4073 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4074 if (SchedTreeA != SchedTreeB) {
4075 // Unscheduled trees have lower priority.
4076 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4077 return ScheduledTrees->test(SchedTreeB);
4078
4079 // Trees with shallower connections have lower priority.
4080 if (DFSResult->getSubtreeLevel(SchedTreeA)
4081 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4082 return DFSResult->getSubtreeLevel(SchedTreeA)
4083 < DFSResult->getSubtreeLevel(SchedTreeB);
4084 }
4085 }
4086 if (MaximizeILP)
4087 return DFSResult->getILP(A) < DFSResult->getILP(B);
4088 else
4089 return DFSResult->getILP(A) > DFSResult->getILP(B);
4090 }
4091};
4092
4093/// Schedule based on the ILP metric.
4094class ILPScheduler : public MachineSchedStrategy {
4095 ScheduleDAGMILive *DAG = nullptr;
4096 ILPOrder Cmp;
4097
4098 std::vector<SUnit*> ReadyQ;
4099
4100public:
4101 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4102
4103 void initialize(ScheduleDAGMI *dag) override {
4104 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4105 DAG = static_cast<ScheduleDAGMILive*>(dag);
4106 DAG->computeDFSResult();
4107 Cmp.DFSResult = DAG->getDFSResult();
4108 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4109 ReadyQ.clear();
4110 }
4111
4112 void registerRoots() override {
4113 // Restore the heap in ReadyQ with the updated DFS results.
4114 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4115 }
4116
4117 /// Implement MachineSchedStrategy interface.
4118 /// -----------------------------------------
4119
4120 /// Callback to select the highest priority node from the ready Q.
4121 SUnit *pickNode(bool &IsTopNode) override {
4122 if (ReadyQ.empty()) return nullptr;
4123 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4124 SUnit *SU = ReadyQ.back();
4125 ReadyQ.pop_back();
4126 IsTopNode = false;
4127 LLVM_DEBUG(dbgs() << "Pick node "
4128 << "SU(" << SU->NodeNum << ") "
4129 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4130 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4131 << " @"
4132 << DAG->getDFSResult()->getSubtreeLevel(
4133 DAG->getDFSResult()->getSubtreeID(SU))
4134 << '\n'
4135 << "Scheduling " << *SU->getInstr());
4136 return SU;
4137 }
4138
4139 /// Scheduler callback to notify that a new subtree is scheduled.
4140 void scheduleTree(unsigned SubtreeID) override {
4141 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4142 }
4143
4144 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4145 /// DFSResults, and resort the priority Q.
4146 void schedNode(SUnit *SU, bool IsTopNode) override {
4147 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4148 }
4149
4150 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4151
4152 void releaseBottomNode(SUnit *SU) override {
4153 ReadyQ.push_back(SU);
4154 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4155 }
4156};
4157
4158} // end anonymous namespace
4159
4161 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4162}
4164 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4165}
4166
4168 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4170 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4171
4172//===----------------------------------------------------------------------===//
4173// Machine Instruction Shuffler for Correctness Testing
4174//===----------------------------------------------------------------------===//
4175
4176#ifndef NDEBUG
4177namespace {
4178
4179/// Apply a less-than relation on the node order, which corresponds to the
4180/// instruction order prior to scheduling. IsReverse implements greater-than.
4181template<bool IsReverse>
4182struct SUnitOrder {
4183 bool operator()(SUnit *A, SUnit *B) const {
4184 if (IsReverse)
4185 return A->NodeNum > B->NodeNum;
4186 else
4187 return A->NodeNum < B->NodeNum;
4188 }
4189};
4190
4191/// Reorder instructions as much as possible.
4192class InstructionShuffler : public MachineSchedStrategy {
4193 bool IsAlternating;
4194 bool IsTopDown;
4195
4196 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4197 // gives nodes with a higher number higher priority causing the latest
4198 // instructions to be scheduled first.
4200 TopQ;
4201
4202 // When scheduling bottom-up, use greater-than as the queue priority.
4204 BottomQ;
4205
4206public:
4207 InstructionShuffler(bool alternate, bool topdown)
4208 : IsAlternating(alternate), IsTopDown(topdown) {}
4209
4210 void initialize(ScheduleDAGMI*) override {
4211 TopQ.clear();
4212 BottomQ.clear();
4213 }
4214
4215 /// Implement MachineSchedStrategy interface.
4216 /// -----------------------------------------
4217
4218 SUnit *pickNode(bool &IsTopNode) override {
4219 SUnit *SU;
4220 if (IsTopDown) {
4221 do {
4222 if (TopQ.empty()) return nullptr;
4223 SU = TopQ.top();
4224 TopQ.pop();
4225 } while (SU->isScheduled);
4226 IsTopNode = true;
4227 } else {
4228 do {
4229 if (BottomQ.empty()) return nullptr;
4230 SU = BottomQ.top();
4231 BottomQ.pop();
4232 } while (SU->isScheduled);
4233 IsTopNode = false;
4234 }
4235 if (IsAlternating)
4236 IsTopDown = !IsTopDown;
4237 return SU;
4238 }
4239
4240 void schedNode(SUnit *SU, bool IsTopNode) override {}
4241
4242 void releaseTopNode(SUnit *SU) override {
4243 TopQ.push(SU);
4244 }
4245 void releaseBottomNode(SUnit *SU) override {
4246 BottomQ.push(SU);
4247 }
4248};
4249
4250} // end anonymous namespace
4251
4253 bool Alternate = !ForceTopDown && !ForceBottomUp;
4254 bool TopDown = !ForceBottomUp;
4255 assert((TopDown || !ForceTopDown) &&
4256 "-misched-topdown incompatible with -misched-bottomup");
4257 return new ScheduleDAGMILive(
4258 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4259}
4260
4262 "shuffle", "Shuffle machine instructions alternating directions",
4264#endif // !NDEBUG
4265
4266//===----------------------------------------------------------------------===//
4267// GraphWriter support for ScheduleDAGMILive.
4268//===----------------------------------------------------------------------===//
4269
4270#ifndef NDEBUG
4271namespace llvm {
4272
4273template<> struct GraphTraits<
4275
4276template<>
4279
4280 static std::string getGraphName(const ScheduleDAG *G) {
4281 return std::string(G->MF.getName());
4282 }
4283
4285 return true;
4286 }
4287
4288 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4289 if (ViewMISchedCutoff == 0)
4290 return false;
4291 return (Node->Preds.size() > ViewMISchedCutoff
4292 || Node->Succs.size() > ViewMISchedCutoff);
4293 }
4294
4295 /// If you want to override the dot attributes printed for a particular
4296 /// edge, override this method.
4297 static std::string getEdgeAttributes(const SUnit *Node,
4298 SUnitIterator EI,
4299 const ScheduleDAG *Graph) {
4300 if (EI.isArtificialDep())
4301 return "color=cyan,style=dashed";
4302 if (EI.isCtrlDep())
4303 return "color=blue,style=dashed";
4304 return "";
4305 }
4306
4307 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4308 std::string Str;
4309 raw_string_ostream SS(Str);
4310 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4311 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4312 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4313 SS << "SU:" << SU->NodeNum;
4314 if (DFS)
4315 SS << " I:" << DFS->getNumInstrs(SU);
4316 return SS.str();
4317 }
4318
4319 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4320 return G->getGraphNodeLabel(SU);
4321 }
4322
4323 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4324 std::string Str("shape=Mrecord");
4325 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4326 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4327 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4328 if (DFS) {
4329 Str += ",style=filled,fillcolor=\"#";
4330 Str += DOT::getColorString(DFS->getSubtreeID(N));
4331 Str += '"';
4332 }
4333 return Str;
4334 }
4335};
4336
4337} // end namespace llvm
4338#endif // NDEBUG
4339
4340/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4341/// rendered using 'dot'.
4342void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4343#ifndef NDEBUG
4344 ViewGraph(this, Name, false, Title);
4345#else
4346 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4347 << "systems with Graphviz or gv!\n";
4348#endif // NDEBUG
4349}
4350
4351/// Out-of-line implementation with no arguments is handy for gdb.
4353 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4354}
4355
4356/// Sort predicate for the intervals stored in an instance of
4357/// ResourceSegments. Intervals are always disjoint (no intersection
4358/// for any pairs of intervals), therefore we can sort the totality of
4359/// the intervals by looking only at the left boundary.
4362 return A.first < B.first;
4363}
4364
4365unsigned ResourceSegments::getFirstAvailableAt(
4366 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4367 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4368 IntervalBuilder) const {
4369 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4370 sortIntervals) &&
4371 "Cannot execute on an un-sorted set of intervals.");
4372
4373 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4374 // a ResourceSegment interval for that situation.
4375 if (AcquireAtCycle == ReleaseAtCycle)
4376 return CurrCycle;
4377
4378 unsigned RetCycle = CurrCycle;
4379 ResourceSegments::IntervalTy NewInterval =
4380 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4381 for (auto &Interval : _Intervals) {
4382 if (!intersects(NewInterval, Interval))
4383 continue;
4384
4385 // Move the interval right next to the top of the one it
4386 // intersects.
4387 assert(Interval.second > NewInterval.first &&
4388 "Invalid intervals configuration.");
4389 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4390 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4391 }
4392 return RetCycle;
4393}
4394
4396 const unsigned CutOff) {
4397 assert(A.first <= A.second && "Cannot add negative resource usage");
4398 assert(CutOff > 0 && "0-size interval history has no use.");
4399 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4400 // instruction needed the resource to be available but does not use it.
4401 // However, ResourceSegment represents an interval that is closed on the left
4402 // and open on the right. It is impossible to represent an empty interval when
4403 // the left is closed. Do not add it to Intervals.
4404 if (A.first == A.second)
4405 return;
4406
4407 assert(all_of(_Intervals,
4408 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4409 return !intersects(A, Interval);
4410 }) &&
4411 "A resource is being overwritten");
4412 _Intervals.push_back(A);
4413
4414 sortAndMerge();
4415
4416 // Do not keep the full history of the intervals, just the
4417 // latest #CutOff.
4418 while (_Intervals.size() > CutOff)
4419 _Intervals.pop_front();
4420}
4421
4424 assert(A.first <= A.second && "Invalid interval");
4425 assert(B.first <= B.second && "Invalid interval");
4426
4427 // Share one boundary.
4428 if ((A.first == B.first) || (A.second == B.second))
4429 return true;
4430
4431 // full intersersect: [ *** ) B
4432 // [***) A
4433 if ((A.first > B.first) && (A.second < B.second))
4434 return true;
4435
4436 // right intersect: [ ***) B
4437 // [*** ) A
4438 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4439 return true;
4440
4441 // left intersect: [*** ) B
4442 // [ ***) A
4443 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4444 return true;
4445
4446 return false;
4447}
4448
4449void ResourceSegments::sortAndMerge() {
4450 if (_Intervals.size() <= 1)
4451 return;
4452
4453 // First sort the collection.
4454 _Intervals.sort(sortIntervals);
4455
4456 // can use next because I have at least 2 elements in the list
4457 auto next = std::next(std::begin(_Intervals));
4458 auto E = std::end(_Intervals);
4459 for (; next != E; ++next) {
4460 if (std::prev(next)->second >= next->first) {
4461 next->first = std::prev(next)->first;
4462 _Intervals.erase(std::prev(next));
4463 continue;
4464 }
4465 }
4466}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
#define P(N)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:36
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
bool isArtificialDep() const
Definition: ScheduleDAG.h:668
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:665
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:270
bool isBottomReady() const
Definition: ScheduleDAG.h:449
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
bool isTopReady() const
Definition: ScheduleDAG.h:446
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:179
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:267
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:90
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1316
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
cl::opt< MISchedPostRASched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISchedPostRASched::TopDown), cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling")))
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
void initializePostMachineSchedulerPass(PassRegistry &)
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
cl::opt< bool > ViewMISchedDAGs
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
cl::opt< bool > ForceBottomUp
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineSchedulerPass(PassRegistry &)
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
cl::opt< bool > ForceTopDown
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.