LLVM 23.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
52#include "llvm/Config/llvm-config.h"
54#include "llvm/MC/LaneBitmask.h"
55#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <limits>
68#include <memory>
69#include <string>
70#include <tuple>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "machine-scheduler"
77
78STATISTIC(NumInstrsInSourceOrderPreRA,
79 "Number of instructions in source order after pre-RA scheduling");
80STATISTIC(NumInstrsInSourceOrderPostRA,
81 "Number of instructions in source order after post-RA scheduling");
82STATISTIC(NumInstrsScheduledPreRA,
83 "Number of instructions scheduled by pre-RA scheduler");
84STATISTIC(NumInstrsScheduledPostRA,
85 "Number of instructions scheduled by post-RA scheduler");
86STATISTIC(NumClustered, "Number of load/store pairs clustered");
87
88STATISTIC(NumTopPreRA,
89 "Number of scheduling units chosen from top queue pre-RA");
90STATISTIC(NumBotPreRA,
91 "Number of scheduling units chosen from bottom queue pre-RA");
92STATISTIC(NumNoCandPreRA,
93 "Number of scheduling units chosen for NoCand heuristic pre-RA");
94STATISTIC(NumOnly1PreRA,
95 "Number of scheduling units chosen for Only1 heuristic pre-RA");
96STATISTIC(NumPhysRegPreRA,
97 "Number of scheduling units chosen for PhysReg heuristic pre-RA");
98STATISTIC(NumRegExcessPreRA,
99 "Number of scheduling units chosen for RegExcess heuristic pre-RA");
100STATISTIC(NumRegCriticalPreRA,
101 "Number of scheduling units chosen for RegCritical heuristic pre-RA");
102STATISTIC(NumStallPreRA,
103 "Number of scheduling units chosen for Stall heuristic pre-RA");
104STATISTIC(NumClusterPreRA,
105 "Number of scheduling units chosen for Cluster heuristic pre-RA");
106STATISTIC(NumWeakPreRA,
107 "Number of scheduling units chosen for Weak heuristic pre-RA");
108STATISTIC(NumRegMaxPreRA,
109 "Number of scheduling units chosen for RegMax heuristic pre-RA");
111 NumResourceReducePreRA,
112 "Number of scheduling units chosen for ResourceReduce heuristic pre-RA");
114 NumResourceDemandPreRA,
115 "Number of scheduling units chosen for ResourceDemand heuristic pre-RA");
117 NumTopDepthReducePreRA,
118 "Number of scheduling units chosen for TopDepthReduce heuristic pre-RA");
120 NumTopPathReducePreRA,
121 "Number of scheduling units chosen for TopPathReduce heuristic pre-RA");
123 NumBotHeightReducePreRA,
124 "Number of scheduling units chosen for BotHeightReduce heuristic pre-RA");
126 NumBotPathReducePreRA,
127 "Number of scheduling units chosen for BotPathReduce heuristic pre-RA");
128STATISTIC(NumNodeOrderPreRA,
129 "Number of scheduling units chosen for NodeOrder heuristic pre-RA");
130STATISTIC(NumFirstValidPreRA,
131 "Number of scheduling units chosen for FirstValid heuristic pre-RA");
132
133STATISTIC(NumTopPostRA,
134 "Number of scheduling units chosen from top queue post-RA");
135STATISTIC(NumBotPostRA,
136 "Number of scheduling units chosen from bottom queue post-RA");
137STATISTIC(NumNoCandPostRA,
138 "Number of scheduling units chosen for NoCand heuristic post-RA");
139STATISTIC(NumOnly1PostRA,
140 "Number of scheduling units chosen for Only1 heuristic post-RA");
141STATISTIC(NumPhysRegPostRA,
142 "Number of scheduling units chosen for PhysReg heuristic post-RA");
143STATISTIC(NumRegExcessPostRA,
144 "Number of scheduling units chosen for RegExcess heuristic post-RA");
146 NumRegCriticalPostRA,
147 "Number of scheduling units chosen for RegCritical heuristic post-RA");
148STATISTIC(NumStallPostRA,
149 "Number of scheduling units chosen for Stall heuristic post-RA");
150STATISTIC(NumClusterPostRA,
151 "Number of scheduling units chosen for Cluster heuristic post-RA");
152STATISTIC(NumWeakPostRA,
153 "Number of scheduling units chosen for Weak heuristic post-RA");
154STATISTIC(NumRegMaxPostRA,
155 "Number of scheduling units chosen for RegMax heuristic post-RA");
157 NumResourceReducePostRA,
158 "Number of scheduling units chosen for ResourceReduce heuristic post-RA");
160 NumResourceDemandPostRA,
161 "Number of scheduling units chosen for ResourceDemand heuristic post-RA");
163 NumTopDepthReducePostRA,
164 "Number of scheduling units chosen for TopDepthReduce heuristic post-RA");
166 NumTopPathReducePostRA,
167 "Number of scheduling units chosen for TopPathReduce heuristic post-RA");
169 NumBotHeightReducePostRA,
170 "Number of scheduling units chosen for BotHeightReduce heuristic post-RA");
172 NumBotPathReducePostRA,
173 "Number of scheduling units chosen for BotPathReduce heuristic post-RA");
174STATISTIC(NumNodeOrderPostRA,
175 "Number of scheduling units chosen for NodeOrder heuristic post-RA");
176STATISTIC(NumFirstValidPostRA,
177 "Number of scheduling units chosen for FirstValid heuristic post-RA");
178
180 "misched-prera-direction", cl::Hidden,
181 cl::desc("Pre reg-alloc list scheduling direction"),
184 clEnumValN(MISched::TopDown, "topdown",
185 "Force top-down pre reg-alloc list scheduling"),
186 clEnumValN(MISched::BottomUp, "bottomup",
187 "Force bottom-up pre reg-alloc list scheduling"),
188 clEnumValN(MISched::Bidirectional, "bidirectional",
189 "Force bidirectional pre reg-alloc list scheduling")));
190
192 "misched-postra-direction", cl::Hidden,
193 cl::desc("Post reg-alloc list scheduling direction"),
196 clEnumValN(MISched::TopDown, "topdown",
197 "Force top-down post reg-alloc list scheduling"),
198 clEnumValN(MISched::BottomUp, "bottomup",
199 "Force bottom-up post reg-alloc list scheduling"),
200 clEnumValN(MISched::Bidirectional, "bidirectional",
201 "Force bidirectional post reg-alloc list scheduling")));
202
203static cl::opt<bool>
205 cl::desc("Print critical path length to stdout"));
206
208 "verify-misched", cl::Hidden,
209 cl::desc("Verify machine instrs before and after machine scheduling"));
210
211#ifndef NDEBUG
213 "view-misched-dags", cl::Hidden,
214 cl::desc("Pop up a window to show MISched dags after they are processed"));
215cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden,
216 cl::desc("Print schedule DAGs"));
218 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
219 cl::desc("Dump resource usage at schedule boundary."));
221 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
222 cl::desc("Show details of invoking getNextResoufceCycle."));
223#else
224const bool llvm::ViewMISchedDAGs = false;
225const bool llvm::PrintDAGs = false;
226static const bool MischedDetailResourceBooking = false;
227#ifdef LLVM_ENABLE_DUMP
228static const bool MISchedDumpReservedCycles = false;
229#endif // LLVM_ENABLE_DUMP
230#endif // NDEBUG
231
232#ifndef NDEBUG
233/// In some situations a few uninteresting nodes depend on nearly all other
234/// nodes in the graph, provide a cutoff to hide them.
235static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
236 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
237
239 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
240
242 cl::desc("Only schedule this function"));
243static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
244 cl::desc("Only schedule this MBB#"));
245#endif // NDEBUG
246
247/// Avoid quadratic complexity in unusually large basic blocks by limiting the
248/// size of the ready lists.
250 cl::desc("Limit ready list to N instructions"), cl::init(256));
251
252static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
253 cl::desc("Enable register pressure scheduling."), cl::init(true));
254
255static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
256 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
257
259 cl::desc("Enable memop clustering."),
260 cl::init(true));
261static cl::opt<bool>
262 ForceFastCluster("force-fast-cluster", cl::Hidden,
263 cl::desc("Switch to fast cluster algorithm with the lost "
264 "of some fusion opportunities"),
265 cl::init(false));
267 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
268 cl::desc("The threshold for fast cluster"),
269 cl::init(1000));
270
271#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
273 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
274 cl::desc("Dump resource usage at schedule boundary."));
276 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
277 cl::desc("Set width of the columns with "
278 "the resources and schedule units"),
279 cl::init(19));
281 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
282 cl::desc("Set width of the columns showing resource booking."),
283 cl::init(5));
285 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
286 cl::desc("Sort the resources printed in the dump trace"));
287#endif
288
290 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
291 cl::desc("Number of intervals to track"), cl::init(10));
292
293// DAG subtrees must have at least this many nodes.
294static const unsigned MinSubtreeSize = 8;
295
296// Pin the vtables to this file.
297void MachineSchedStrategy::anchor() {}
298
299void ScheduleDAGMutation::anchor() {}
300
301//===----------------------------------------------------------------------===//
302// Machine Instruction Scheduling Pass and Registry
303//===----------------------------------------------------------------------===//
304
308
312
313namespace llvm {
314namespace impl_detail {
315
316/// Base class for the machine scheduler classes.
318protected:
319 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
320};
321
322/// Impl class for MachineScheduler.
324 // These are only for using MF.verify()
325 // remove when verify supports passing in all analyses
326 MachineFunctionPass *P = nullptr;
327 MachineFunctionAnalysisManager *MFAM = nullptr;
328
329public:
337
339 // Migration only
340 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
341 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
342
343 bool run(MachineFunction &MF, const TargetMachine &TM,
344 const RequiredAnalyses &Analyses);
345
346protected:
348};
349
350/// Impl class for PostMachineScheduler.
352 // These are only for using MF.verify()
353 // remove when verify supports passing in all analyses
354 MachineFunctionPass *P = nullptr;
355 MachineFunctionAnalysisManager *MFAM = nullptr;
356
357public:
363 // Migration only
364 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
365 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
366
367 bool run(MachineFunction &Func, const TargetMachine &TM,
368 const RequiredAnalyses &Analyses);
369
370protected:
372};
373
374} // namespace impl_detail
375} // namespace llvm
376
380
381namespace {
382/// MachineScheduler runs after coalescing and before register allocation.
383class MachineSchedulerLegacy : public MachineFunctionPass {
384 MachineSchedulerImpl Impl;
385
386public:
387 MachineSchedulerLegacy();
388 void getAnalysisUsage(AnalysisUsage &AU) const override;
389 bool runOnMachineFunction(MachineFunction&) override;
390
391 static char ID; // Class identification, replacement for typeinfo
392};
393
394/// PostMachineScheduler runs after shortly before code emission.
395class PostMachineSchedulerLegacy : public MachineFunctionPass {
396 PostMachineSchedulerImpl Impl;
397
398public:
399 PostMachineSchedulerLegacy();
400 void getAnalysisUsage(AnalysisUsage &AU) const override;
401 bool runOnMachineFunction(MachineFunction &) override;
402
403 static char ID; // Class identification, replacement for typeinfo
404};
405
406} // end anonymous namespace
407
408char MachineSchedulerLegacy::ID = 0;
409
410char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
411
412INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
413 "Machine Instruction Scheduler", false, false)
420INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
421 "Machine Instruction Scheduler", false, false)
422
423MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {
425}
426
427void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
428 AU.setPreservesCFG();
439}
440
441char PostMachineSchedulerLegacy::ID = 0;
442
443char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
444
445INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
446 "PostRA Machine Instruction Scheduler", false, false)
450INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
451 "PostRA Machine Instruction Scheduler", false, false)
452
453PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
456}
457
458void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
459 AU.setPreservesCFG();
465}
466
469
470/// A dummy default scheduler factory indicates whether the scheduler
471/// is overridden on the command line.
475
476/// MachineSchedOpt allows command line selection of the scheduler.
481 cl::desc("Machine instruction scheduler to use"));
482
484DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
486
488 "enable-misched",
489 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
490 cl::Hidden);
491
493 "enable-post-misched",
494 cl::desc("Enable the post-ra machine instruction scheduling pass."),
495 cl::init(true), cl::Hidden);
496
497/// Decrement this iterator until reaching the top or a non-debug instr.
501 assert(I != Beg && "reached the top of the region, cannot decrement");
502 while (--I != Beg) {
503 if (!I->isDebugOrPseudoInstr())
504 break;
505 }
506 return I;
507}
508
509/// Non-const version.
516
517/// If this iterator is a debug value, increment until reaching the End or a
518/// non-debug instruction.
522 for(; I != End; ++I) {
523 if (!I->isDebugOrPseudoInstr())
524 break;
525 }
526 return I;
527}
528
529/// Non-const version.
536
537/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
539 // Select the scheduler, or set the default.
541 if (Ctor != useDefaultMachineSched)
542 return Ctor(this);
543
544 // Get the default scheduler set by the target for this function.
545 ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
546 if (Scheduler)
547 return Scheduler;
548
549 // Default to GenericScheduler.
550 return createSchedLive(this);
551}
552
554 const RequiredAnalyses &Analyses) {
555 MF = &Func;
556 MLI = &Analyses.MLI;
557 MDT = &Analyses.MDT;
558 this->TM = &TM;
559 AA = &Analyses.AA;
560 LIS = &Analyses.LIS;
561 MBFI = &Analyses.MBFI;
562
563 if (VerifyScheduling) {
564 LLVM_DEBUG(LIS->dump());
565 const char *MSchedBanner = "Before machine scheduling.";
566 if (P)
567 MF->verify(P, MSchedBanner, &errs());
568 else
569 MF->verify(*MFAM, MSchedBanner, &errs());
570 }
571 RegClassInfo->runOnMachineFunction(*MF);
572
573 // Instantiate the selected scheduler for this target, function, and
574 // optimization level.
575 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
576 scheduleRegions(*Scheduler, false);
577
578 LLVM_DEBUG(LIS->dump());
579 if (VerifyScheduling) {
580 const char *MSchedBanner = "After machine scheduling.";
581 if (P)
582 MF->verify(P, MSchedBanner, &errs());
583 else
584 MF->verify(*MFAM, MSchedBanner, &errs());
585 }
586 return true;
587}
588
589/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
590/// the caller. We don't have a command line option to override the postRA
591/// scheduler. The Target must configure it.
593 // Get the postRA scheduler set by the target for this function.
594 ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
595 if (Scheduler)
596 return Scheduler;
597
598 // Default to GenericScheduler.
599 return createSchedPostRA(this);
600}
601
603 const TargetMachine &TM,
604 const RequiredAnalyses &Analyses) {
605 MF = &Func;
606 MLI = &Analyses.MLI;
607 this->TM = &TM;
608 AA = &Analyses.AA;
609
610 if (VerifyScheduling) {
611 const char *PostMSchedBanner = "Before post machine scheduling.";
612 if (P)
613 MF->verify(P, PostMSchedBanner, &errs());
614 else
615 MF->verify(*MFAM, PostMSchedBanner, &errs());
616 }
617
618 // Instantiate the selected scheduler for this target, function, and
619 // optimization level.
620 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
622
623 if (VerifyScheduling) {
624 const char *PostMSchedBanner = "After post machine scheduling.";
625 if (P)
626 MF->verify(P, PostMSchedBanner, &errs());
627 else
628 MF->verify(*MFAM, PostMSchedBanner, &errs());
629 }
630 return true;
631}
632
633/// Top-level MachineScheduler pass driver.
634///
635/// Visit blocks in function order. Divide each block into scheduling regions
636/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
637/// consistent with the DAG builder, which traverses the interior of the
638/// scheduling regions bottom-up.
639///
640/// This design avoids exposing scheduling boundaries to the DAG builder,
641/// simplifying the DAG builder's support for "special" target instructions.
642/// At the same time the design allows target schedulers to operate across
643/// scheduling boundaries, for example to bundle the boundary instructions
644/// without reordering them. This creates complexity, because the target
645/// scheduler must update the RegionBegin and RegionEnd positions cached by
646/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
647/// design would be to split blocks at scheduling boundaries, but LLVM has a
648/// general bias against block splitting purely for implementation simplicity.
649bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
650 if (skipFunction(MF.getFunction()))
651 return false;
652
653 if (EnableMachineSched.getNumOccurrences()) {
655 return false;
656 } else if (!MF.getSubtarget().enableMachineScheduler()) {
657 return false;
658 }
659
660 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
661
662 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
663 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
664 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
665 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
666 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
667 auto &MBFI = getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
668 Impl.setLegacyPass(this);
669 return Impl.run(MF, TM, {MLI, MDT, AA, LIS, MBFI});
670}
671
673 : Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
676 default;
677
679 : Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
681 PostMachineSchedulerPass &&Other) = default;
683
687 if (EnableMachineSched.getNumOccurrences()) {
689 return PreservedAnalyses::all();
690 } else if (!MF.getSubtarget().enableMachineScheduler()) {
691 return PreservedAnalyses::all();
692 }
693
694 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
695 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
696 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
698 .getManager();
699 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
700 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
701 auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
702
703 Impl->setMFAM(&MFAM);
704 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS, MBFI});
705 if (!Changed)
706 return PreservedAnalyses::all();
707
710 .preserve<SlotIndexesAnalysis>()
711 .preserve<LiveIntervalsAnalysis>();
712}
713
714bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
715 if (skipFunction(MF.getFunction()))
716 return false;
717
718 if (EnablePostRAMachineSched.getNumOccurrences()) {
720 return false;
721 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
722 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
723 return false;
724 }
725 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
726 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
727 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
728 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
729 Impl.setLegacyPass(this);
730 return Impl.run(MF, TM, {MLI, AA});
731}
732
736 if (EnablePostRAMachineSched.getNumOccurrences()) {
738 return PreservedAnalyses::all();
739 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
740 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
741 return PreservedAnalyses::all();
742 }
743 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
744 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
746 .getManager();
747 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
748
749 Impl->setMFAM(&MFAM);
750 bool Changed = Impl->run(MF, *TM, {MLI, AA});
751 if (!Changed)
752 return PreservedAnalyses::all();
753
756 return PA;
757}
758
759/// Return true of the given instruction should not be included in a scheduling
760/// region.
761///
762/// MachineScheduler does not currently support scheduling across calls. To
763/// handle calls, the DAG builder needs to be modified to create register
764/// anti/output dependencies on the registers clobbered by the call's regmask
765/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
766/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
767/// the boundary, but there would be no benefit to postRA scheduling across
768/// calls this late anyway.
771 MachineFunction *MF,
772 const TargetInstrInfo *TII) {
773 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
774 MI->isFakeUse();
775}
776
778
779static void
781 MBBRegionsVector &Regions,
782 bool RegionsTopDown) {
783 MachineFunction *MF = MBB->getParent();
785
787 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
788 RegionEnd != MBB->begin(); RegionEnd = I) {
789
790 // Avoid decrementing RegionEnd for blocks with no terminator.
791 if (RegionEnd != MBB->end() ||
792 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
793 --RegionEnd;
794 }
795
796 // The next region starts above the previous region. Look backward in the
797 // instruction stream until we find the nearest boundary.
798 unsigned NumRegionInstrs = 0;
799 I = RegionEnd;
800 for (;I != MBB->begin(); --I) {
801 MachineInstr &MI = *std::prev(I);
802 if (isSchedBoundary(&MI, &*MBB, MF, TII))
803 break;
804 if (!MI.isDebugOrPseudoInstr()) {
805 // MBB::size() uses instr_iterator to count. Here we need a bundle to
806 // count as a single instruction.
807 ++NumRegionInstrs;
808 }
809 }
810
811 // It's possible we found a scheduling region that only has debug
812 // instructions. Don't bother scheduling these.
813 if (NumRegionInstrs != 0)
814 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
815 }
816
817 if (RegionsTopDown)
818 std::reverse(Regions.begin(), Regions.end());
819}
820
821/// Main driver for both MachineScheduler and PostMachineScheduler.
823 bool FixKillFlags) {
824 // Visit all machine basic blocks.
825 //
826 // TODO: Visit blocks in global postorder or postorder within the bottom-up
827 // loop tree. Then we can optionally compute global RegPressure.
828 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
829 MBB != MBBEnd; ++MBB) {
830
831 Scheduler.startBlock(&*MBB);
832
833#ifndef NDEBUG
834 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
835 continue;
836 if (SchedOnlyBlock.getNumOccurrences()
837 && (int)SchedOnlyBlock != MBB->getNumber())
838 continue;
839#endif
840
841 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
842 // points to the scheduling boundary at the bottom of the region. The DAG
843 // does not include RegionEnd, but the region does (i.e. the next
844 // RegionEnd is above the previous RegionBegin). If the current block has
845 // no terminator then RegionEnd == MBB->end() for the bottom region.
846 //
847 // All the regions of MBB are first found and stored in MBBRegions, which
848 // will be processed (MBB) top-down if initialized with true.
849 //
850 // The Scheduler may insert instructions during either schedule() or
851 // exitRegion(), even for empty regions. So the local iterators 'I' and
852 // 'RegionEnd' are invalid across these calls. Instructions must not be
853 // added to other regions than the current one without updating MBBRegions.
854
855 MBBRegionsVector MBBRegions;
856 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
857 bool ScheduleSingleMI = Scheduler.shouldScheduleSingleMIRegions();
858 for (const SchedRegion &R : MBBRegions) {
859 MachineBasicBlock::iterator I = R.RegionBegin;
860 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
861 unsigned NumRegionInstrs = R.NumRegionInstrs;
862
863 // Notify the scheduler of the region, even if we may skip scheduling
864 // it. Perhaps it still needs to be bundled.
865 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
866
867 // Skip empty scheduling regions and, conditionally, regions with a single
868 // MI.
869 if (I == RegionEnd || (!ScheduleSingleMI && I == std::prev(RegionEnd))) {
870 // Close the current region. Bundle the terminator if needed.
871 // This invalidates 'RegionEnd' and 'I'.
872 Scheduler.exitRegion();
873 continue;
874 }
875 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
876 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
877 << " " << MBB->getName() << "\n From: " << *I
878 << " To: ";
879 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
880 else dbgs() << "End\n";
881 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
883 errs() << MF->getName();
884 errs() << ":%bb. " << MBB->getNumber();
885 errs() << " " << MBB->getName() << " \n";
886 }
887
888 // Schedule a region: possibly reorder instructions.
889 // This invalidates the original region iterators.
890 Scheduler.schedule();
891
892 // Close the current region.
893 Scheduler.exitRegion();
894 }
895 Scheduler.finishBlock();
896 // FIXME: Ideally, no further passes should rely on kill flags. However,
897 // thumb2 size reduction is currently an exception, so the PostMIScheduler
898 // needs to do this.
899 if (FixKillFlags)
900 Scheduler.fixupKills(*MBB);
901 }
902 Scheduler.finalizeSchedule();
903}
904
905#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
907 dbgs() << "Queue " << Name << ": ";
908 for (const SUnit *SU : Queue)
909 dbgs() << SU->NodeNum << " ";
910 dbgs() << "\n";
911}
912#endif
913
914//===----------------------------------------------------------------------===//
915// ScheduleDAGMI - Basic machine instruction scheduling. This is
916// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
917// virtual registers.
918// ===----------------------------------------------------------------------===/
919
920// Provide a vtable anchor.
922
923/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
924/// NumPredsLeft reaches zero, release the successor node.
925///
926/// FIXME: Adjust SuccSU height based on MinLatency.
928 SUnit *SuccSU = SuccEdge->getSUnit();
929
930 if (SuccEdge->isWeak()) {
931 --SuccSU->WeakPredsLeft;
932 return;
933 }
934#ifndef NDEBUG
935 if (SuccSU->NumPredsLeft == 0) {
936 dbgs() << "*** Scheduling failed! ***\n";
937 dumpNode(*SuccSU);
938 dbgs() << " has been released too many times!\n";
939 llvm_unreachable(nullptr);
940 }
941#endif
942 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
943 // CurrCycle may have advanced since then.
944 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
945 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
946
947 --SuccSU->NumPredsLeft;
948 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
949 SchedImpl->releaseTopNode(SuccSU);
950}
951
952/// releaseSuccessors - Call releaseSucc on each of SU's successors.
954 for (SDep &Succ : SU->Succs)
955 releaseSucc(SU, &Succ);
956}
957
958/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
959/// NumSuccsLeft reaches zero, release the predecessor node.
960///
961/// FIXME: Adjust PredSU height based on MinLatency.
963 SUnit *PredSU = PredEdge->getSUnit();
964
965 if (PredEdge->isWeak()) {
966 --PredSU->WeakSuccsLeft;
967 return;
968 }
969#ifndef NDEBUG
970 if (PredSU->NumSuccsLeft == 0) {
971 dbgs() << "*** Scheduling failed! ***\n";
972 dumpNode(*PredSU);
973 dbgs() << " has been released too many times!\n";
974 llvm_unreachable(nullptr);
975 }
976#endif
977 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
978 // CurrCycle may have advanced since then.
979 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
980 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
981
982 --PredSU->NumSuccsLeft;
983 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
984 SchedImpl->releaseBottomNode(PredSU);
985}
986
987/// releasePredecessors - Call releasePred on each of SU's predecessors.
989 for (SDep &Pred : SU->Preds)
990 releasePred(SU, &Pred);
991}
992
997
1002
1003/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
1004/// after crossing a scheduling boundary. [begin, end) includes all instructions
1005/// in the region, including the boundary itself and single-instruction regions
1006/// that don't get scheduled.
1010 unsigned regioninstrs)
1011{
1012 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
1013
1014 SchedImpl->initPolicy(begin, end, regioninstrs);
1015
1016 // Set dump direction after initializing sched policy.
1018 if (SchedImpl->getPolicy().OnlyTopDown)
1020 else if (SchedImpl->getPolicy().OnlyBottomUp)
1022 else
1025}
1026
1027/// This is normally called from the main scheduler loop but may also be invoked
1028/// by the scheduling strategy to perform additional code motion.
1031 // Advance RegionBegin if the first instruction moves down.
1032 if (&*RegionBegin == MI)
1033 ++RegionBegin;
1034
1035 // Update the instruction stream.
1036 BB->splice(InsertPos, BB, MI);
1037
1038 // Update LiveIntervals
1039 if (LIS)
1040 LIS->handleMove(*MI, /*UpdateFlags=*/true);
1041
1042 // Recede RegionBegin if an instruction moves above the first.
1043 if (RegionBegin == InsertPos)
1044 RegionBegin = MI;
1045}
1046
1048#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
1049 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
1051 return false;
1052 }
1053 ++NumInstrsScheduled;
1054#endif
1055 return true;
1056}
1057
1058/// Per-region scheduling driver, called back from
1059/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
1060/// that does not consider liveness or register pressure. It is useful for
1061/// PostRA scheduling and potentially other custom schedulers.
1063 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
1064 LLVM_DEBUG(SchedImpl->dumpPolicy());
1065
1066 // Build the DAG.
1068
1070
1071 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1072 findRootsAndBiasEdges(TopRoots, BotRoots);
1073
1074 LLVM_DEBUG(dump());
1075 if (PrintDAGs) dump();
1077
1078 // Initialize the strategy before modifying the DAG.
1079 // This may initialize a DFSResult to be used for queue priority.
1080 SchedImpl->initialize(this);
1081
1082 // Initialize ready queues now that the DAG and priority data are finalized.
1083 initQueues(TopRoots, BotRoots);
1084
1085 bool IsTopNode = false;
1086 while (true) {
1087 if (!checkSchedLimit())
1088 break;
1089
1090 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1091 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1092 if (!SU) break;
1093
1094 assert(!SU->isScheduled && "Node already scheduled");
1095
1096 MachineInstr *MI = SU->getInstr();
1097 if (IsTopNode) {
1098 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1099 if (&*CurrentTop == MI)
1101 else
1103 } else {
1104 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1107 if (&*priorII == MI)
1108 CurrentBottom = priorII;
1109 else {
1110 if (&*CurrentTop == MI)
1111 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1113 CurrentBottom = MI;
1114 }
1115 }
1116 // Notify the scheduling strategy before updating the DAG.
1117 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1118 // runs, it can then use the accurate ReadyCycle time to determine whether
1119 // newly released nodes can move to the readyQ.
1120 SchedImpl->schedNode(SU, IsTopNode);
1121
1122 updateQueues(SU, IsTopNode);
1123 }
1124 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1125
1127
1128 LLVM_DEBUG({
1129 dbgs() << "*** Final schedule for "
1130 << printMBBReference(*begin()->getParent()) << " ***\n";
1131 dumpSchedule();
1132 dbgs() << '\n';
1133 });
1134}
1135
1136/// Apply each ScheduleDAGMutation step in order.
1138 for (auto &m : Mutations)
1139 m->apply(this);
1140}
1141
1144 SmallVectorImpl<SUnit*> &BotRoots) {
1145 for (SUnit &SU : SUnits) {
1146 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1147
1148 // Order predecessors so DFSResult follows the critical path.
1149 SU.biasCriticalPath();
1150
1151 // A SUnit is ready to top schedule if it has no predecessors.
1152 if (!SU.NumPredsLeft)
1153 TopRoots.push_back(&SU);
1154 // A SUnit is ready to bottom schedule if it has no successors.
1155 if (!SU.NumSuccsLeft)
1156 BotRoots.push_back(&SU);
1157 }
1158 ExitSU.biasCriticalPath();
1159}
1160
1161/// Identify DAG roots and setup scheduler queues.
1163 ArrayRef<SUnit *> BotRoots) {
1164 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1165 //
1166 // Nodes with unreleased weak edges can still be roots.
1167 // Release top roots in forward order.
1168 for (SUnit *SU : TopRoots)
1169 SchedImpl->releaseTopNode(SU);
1170
1171 // Release bottom roots in reverse order so the higher priority nodes appear
1172 // first. This is more natural and slightly more efficient.
1174 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1175 SchedImpl->releaseBottomNode(*I);
1176 }
1177
1180
1181 SchedImpl->registerRoots();
1182
1183 // Advance past initial DebugValues.
1186}
1187
1188/// Update scheduler queues after scheduling an instruction.
1189void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
1190 // Release dependent instructions for scheduling.
1191 if (IsTopNode)
1193 else
1195
1196 SU->isScheduled = true;
1197}
1198
1199/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1201 // If first instruction was a DBG_VALUE then put it back.
1202 if (FirstDbgValue) {
1203 BB->splice(RegionBegin, BB, FirstDbgValue);
1205 }
1206
1207 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1208 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1209 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
1210 MachineInstr *DbgValue = P.first;
1211 MachineBasicBlock::iterator OrigPrevMI = P.second;
1212 if (&*RegionBegin == DbgValue)
1213 ++RegionBegin;
1214 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
1215 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1217 }
1218}
1219
1220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1221static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1222
1224 // Bail off when there is no schedule model to query.
1225 if (!SchedModel.hasInstrSchedModel())
1226 return;
1227
1228 // Nothing to show if there is no or just one instruction.
1229 if (BB->size() < 2)
1230 return;
1231
1232 dbgs() << " * Schedule table (TopDown):\n";
1233 dbgs() << scheduleTableLegend << "\n";
1234 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1235 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1236 for (MachineInstr &MI : *this) {
1237 SUnit *SU = getSUnit(&MI);
1238 if (!SU)
1239 continue;
1240 const MCSchedClassDesc *SC = getSchedClass(SU);
1241 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1242 PE = SchedModel.getWriteProcResEnd(SC);
1243 PI != PE; ++PI) {
1244 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1245 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1246 }
1247 }
1248 // Print the header with the cycles
1249 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1250 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1251 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1252 dbgs() << "|\n";
1253
1254 for (MachineInstr &MI : *this) {
1255 SUnit *SU = getSUnit(&MI);
1256 if (!SU) {
1257 dbgs() << "Missing SUnit\n";
1258 continue;
1259 }
1260 std::string NodeName("SU(");
1261 NodeName += std::to_string(SU->NodeNum) + ")";
1262 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1263 unsigned C = FirstCycle;
1264 for (; C <= LastCycle; ++C) {
1265 if (C == SU->TopReadyCycle)
1266 dbgs() << llvm::left_justify("| i", ColWidth);
1267 else
1268 dbgs() << llvm::left_justify("|", ColWidth);
1269 }
1270 dbgs() << "|\n";
1271 const MCSchedClassDesc *SC = getSchedClass(SU);
1272
1274 make_range(SchedModel.getWriteProcResBegin(SC),
1275 SchedModel.getWriteProcResEnd(SC)));
1276
1279 ResourcesIt,
1280 [](const MCWriteProcResEntry &LHS,
1281 const MCWriteProcResEntry &RHS) -> bool {
1282 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1283 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1284 });
1285 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1286 C = FirstCycle;
1287 const std::string ResName =
1288 SchedModel.getResourceName(PI.ProcResourceIdx);
1289 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1290 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1291 dbgs() << llvm::left_justify("|", ColWidth);
1292 }
1293 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1294 ++I, ++C)
1295 dbgs() << llvm::left_justify("| x", ColWidth);
1296 while (C++ <= LastCycle)
1297 dbgs() << llvm::left_justify("|", ColWidth);
1298 // Place end char
1299 dbgs() << "| \n";
1300 }
1301 }
1302}
1303
1305 // Bail off when there is no schedule model to query.
1306 if (!SchedModel.hasInstrSchedModel())
1307 return;
1308
1309 // Nothing to show if there is no or just one instruction.
1310 if (BB->size() < 2)
1311 return;
1312
1313 dbgs() << " * Schedule table (BottomUp):\n";
1314 dbgs() << scheduleTableLegend << "\n";
1315
1316 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1317 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1318 for (MachineInstr &MI : *this) {
1319 SUnit *SU = getSUnit(&MI);
1320 if (!SU)
1321 continue;
1322 const MCSchedClassDesc *SC = getSchedClass(SU);
1323 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1324 PE = SchedModel.getWriteProcResEnd(SC);
1325 PI != PE; ++PI) {
1326 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1327 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1328 }
1329 }
1330 // Print the header with the cycles
1331 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1332 for (int C = FirstCycle; C >= LastCycle; --C)
1333 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1334 dbgs() << "|\n";
1335
1336 for (MachineInstr &MI : *this) {
1337 SUnit *SU = getSUnit(&MI);
1338 if (!SU) {
1339 dbgs() << "Missing SUnit\n";
1340 continue;
1341 }
1342 std::string NodeName("SU(");
1343 NodeName += std::to_string(SU->NodeNum) + ")";
1344 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1345 int C = FirstCycle;
1346 for (; C >= LastCycle; --C) {
1347 if (C == (int)SU->BotReadyCycle)
1348 dbgs() << llvm::left_justify("| i", ColWidth);
1349 else
1350 dbgs() << llvm::left_justify("|", ColWidth);
1351 }
1352 dbgs() << "|\n";
1353 const MCSchedClassDesc *SC = getSchedClass(SU);
1355 make_range(SchedModel.getWriteProcResBegin(SC),
1356 SchedModel.getWriteProcResEnd(SC)));
1357
1360 ResourcesIt,
1361 [](const MCWriteProcResEntry &LHS,
1362 const MCWriteProcResEntry &RHS) -> bool {
1363 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1364 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1365 });
1366 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1367 C = FirstCycle;
1368 const std::string ResName =
1369 SchedModel.getResourceName(PI.ProcResourceIdx);
1370 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1371 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1372 dbgs() << llvm::left_justify("|", ColWidth);
1373 }
1374 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1375 ++I, --C)
1376 dbgs() << llvm::left_justify("| x", ColWidth);
1377 while (C-- >= LastCycle)
1378 dbgs() << llvm::left_justify("|", ColWidth);
1379 // Place end char
1380 dbgs() << "| \n";
1381 }
1382 }
1383}
1384#endif
1385
1386#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1391 else if (DumpDir == DumpDirection::BottomUp)
1394 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1395 } else {
1396 dbgs() << "* Schedule table: DumpDirection not set.\n";
1397 }
1398 }
1399
1400 for (MachineInstr &MI : *this) {
1401 if (SUnit *SU = getSUnit(&MI))
1402 dumpNode(*SU);
1403 else
1404 dbgs() << "Missing SUnit\n";
1405 }
1406}
1407#endif
1408
1409//===----------------------------------------------------------------------===//
1410// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1411// preservation.
1412//===----------------------------------------------------------------------===//
1413
1417
1419 const MachineInstr &MI = *SU.getInstr();
1420 for (const MachineOperand &MO : MI.operands()) {
1421 if (!MO.isReg())
1422 continue;
1423 if (!MO.readsReg())
1424 continue;
1425 if (TrackLaneMasks && !MO.isUse())
1426 continue;
1427
1428 Register Reg = MO.getReg();
1429 if (!Reg.isVirtual())
1430 continue;
1431
1432 // Ignore re-defs.
1433 if (TrackLaneMasks) {
1434 bool FoundDef = false;
1435 for (const MachineOperand &MO2 : MI.all_defs()) {
1436 if (MO2.getReg() == Reg && !MO2.isDead()) {
1437 FoundDef = true;
1438 break;
1439 }
1440 }
1441 if (FoundDef)
1442 continue;
1443 }
1444
1445 // Record this local VReg use.
1447 for (; UI != VRegUses.end(); ++UI) {
1448 if (UI->SU == &SU)
1449 break;
1450 }
1451 if (UI == VRegUses.end())
1452 VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1453 }
1454}
1455
1456/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1457/// crossing a scheduling boundary. [begin, end) includes all instructions in
1458/// the region, including the boundary itself and single-instruction regions
1459/// that don't get scheduled.
1463 unsigned regioninstrs)
1464{
1465 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1466 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1467
1468 // For convenience remember the end of the liveness region.
1469 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1470
1471 SUPressureDiffs.clear();
1472
1473 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1474 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1475
1477 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1478}
1479
1480// Setup the register pressure trackers for the top scheduled and bottom
1481// scheduled regions.
1483 VRegUses.clear();
1484 VRegUses.setUniverse(MRI.getNumVirtRegs());
1485 for (SUnit &SU : SUnits)
1486 collectVRegUses(SU);
1487
1489 ShouldTrackLaneMasks, false);
1491 ShouldTrackLaneMasks, false);
1492
1493 // Close the RPTracker to finalize live ins.
1494 RPTracker.closeRegion();
1495
1496 LLVM_DEBUG(RPTracker.dump());
1497
1498 // Initialize the live ins and live outs.
1499 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1500 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1501
1502 // Close one end of the tracker so we can call
1503 // getMaxUpward/DownwardPressureDelta before advancing across any
1504 // instructions. This converts currently live regs into live ins/outs.
1505 TopRPTracker.closeTop();
1506 BotRPTracker.closeBottom();
1507
1508 BotRPTracker.initLiveThru(RPTracker);
1509 if (!BotRPTracker.getLiveThru().empty()) {
1510 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1511 LLVM_DEBUG(dbgs() << "Live Thru: ";
1512 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1513 };
1514
1515 // For each live out vreg reduce the pressure change associated with other
1516 // uses of the same vreg below the live-out reaching def.
1517 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1518
1519 // Account for liveness generated by the region boundary.
1520 if (LiveRegionEnd != RegionEnd) {
1522 BotRPTracker.recede(&LiveUses);
1523 updatePressureDiffs(LiveUses);
1524 }
1525
1526 LLVM_DEBUG(dbgs() << "Top Pressure: ";
1527 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1528 dbgs() << "Bottom Pressure: ";
1529 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1530
1531 assert((BotRPTracker.getPos() == RegionEnd ||
1532 (RegionEnd->isDebugInstr() &&
1534 "Can't find the region bottom");
1535
1536 // Cache the list of excess pressure sets in this region. This will also track
1537 // the max pressure in the scheduled code for these sets.
1538 RegionCriticalPSets.clear();
1539 const std::vector<unsigned> &RegionPressure =
1540 RPTracker.getPressure().MaxSetPressure;
1541 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1542 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1543 if (RegionPressure[i] > Limit) {
1544 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1545 << " Actual " << RegionPressure[i] << "\n");
1546 RegionCriticalPSets.push_back(PressureChange(i));
1547 }
1548 }
1549 LLVM_DEBUG({
1550 if (RegionCriticalPSets.size() > 0) {
1551 dbgs() << "Excess PSets: ";
1552 for (const PressureChange &RCPS : RegionCriticalPSets)
1553 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1554 dbgs() << "\n";
1555 }
1556 });
1557}
1558
1561 const std::vector<unsigned> &NewMaxPressure) {
1562 const PressureDiff &PDiff = getPressureDiff(SU);
1563 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1564 for (const PressureChange &PC : PDiff) {
1565 if (!PC.isValid())
1566 break;
1567 unsigned ID = PC.getPSet();
1568 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1569 ++CritIdx;
1570 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1571 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1572 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1573 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1574 }
1575 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1576 if (NewMaxPressure[ID] >= Limit - 2) {
1577 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1578 << NewMaxPressure[ID]
1579 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1580 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1581 << " livethru)\n");
1582 }
1583 }
1584}
1585
1586/// Update the PressureDiff array for liveness after scheduling this
1587/// instruction.
1589 for (const VRegMaskOrUnit &P : LiveUses) {
1590 /// FIXME: Currently assuming single-use physregs.
1591 if (!P.VRegOrUnit.isVirtualReg())
1592 continue;
1593 Register Reg = P.VRegOrUnit.asVirtualReg();
1594
1596 // If the register has just become live then other uses won't change
1597 // this fact anymore => decrement pressure.
1598 // If the register has just become dead then other uses make it come
1599 // back to life => increment pressure.
1600 bool Decrement = P.LaneMask.any();
1601
1602 for (const VReg2SUnit &V2SU
1603 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1604 SUnit &SU = *V2SU.SU;
1605 if (SU.isScheduled || &SU == &ExitSU)
1606 continue;
1607
1608 PressureDiff &PDiff = getPressureDiff(&SU);
1609 PDiff.addPressureChange(VirtRegOrUnit(Reg), Decrement, &MRI);
1610 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1611 return Change.isValid();
1612 }))
1614 << " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1615 << printReg(Reg, TRI) << ':'
1616 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1617 dbgs() << " to "; PDiff.dump(*TRI););
1618 }
1619 } else {
1620 assert(P.LaneMask.any());
1621 LLVM_DEBUG(dbgs() << " LiveReg: " << printReg(Reg, TRI) << "\n");
1622 // This may be called before CurrentBottom has been initialized. However,
1623 // BotRPTracker must have a valid position. We want the value live into the
1624 // instruction or live out of the block, so ask for the previous
1625 // instruction's live-out.
1626 const LiveInterval &LI = LIS->getInterval(Reg);
1627 VNInfo *VNI;
1629 nextIfDebug(BotRPTracker.getPos(), BB->end());
1630 if (I == BB->end())
1631 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1632 else {
1633 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1634 VNI = LRQ.valueIn();
1635 }
1636 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1637 assert(VNI && "No live value at use.");
1638 for (const VReg2SUnit &V2SU
1639 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1640 SUnit *SU = V2SU.SU;
1641 // If this use comes before the reaching def, it cannot be a last use,
1642 // so decrease its pressure change.
1643 if (!SU->isScheduled && SU != &ExitSU) {
1644 LiveQueryResult LRQ =
1645 LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1646 if (LRQ.valueIn() == VNI) {
1647 PressureDiff &PDiff = getPressureDiff(SU);
1648 PDiff.addPressureChange(VirtRegOrUnit(Reg), true, &MRI);
1649 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1650 return Change.isValid();
1651 }))
1652 LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1653 << ") " << *SU->getInstr();
1654 dbgs() << " to ";
1655 PDiff.dump(*TRI););
1656 }
1657 }
1658 }
1659 }
1660 }
1661}
1662
1664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1665 if (EntrySU.getInstr() != nullptr)
1667 for (const SUnit &SU : SUnits) {
1668 dumpNodeAll(SU);
1669 if (ShouldTrackPressure) {
1670 dbgs() << " Pressure Diff : ";
1671 getPressureDiff(&SU).dump(*TRI);
1672 }
1673 dbgs() << " Single Issue : ";
1674 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1675 SchedModel.mustEndGroup(SU.getInstr()))
1676 dbgs() << "true;";
1677 else
1678 dbgs() << "false;";
1679 dbgs() << '\n';
1680 }
1681 if (ExitSU.getInstr() != nullptr)
1683#endif
1684}
1685
1686/// schedule - Called back from MachineScheduler::runOnMachineFunction
1687/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1688/// only includes instructions that have DAG nodes, not scheduling boundaries.
1689///
1690/// This is a skeletal driver, with all the functionality pushed into helpers,
1691/// so that it can be easily extended by experimental schedulers. Generally,
1692/// implementing MachineSchedStrategy should be sufficient to implement a new
1693/// scheduling algorithm. However, if a scheduler further subclasses
1694/// ScheduleDAGMILive then it will want to override this virtual method in order
1695/// to update any specialized state.
1697 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1698 LLVM_DEBUG(SchedImpl->dumpPolicy());
1700
1702
1703 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1704 findRootsAndBiasEdges(TopRoots, BotRoots);
1705
1706 // Initialize the strategy before modifying the DAG.
1707 // This may initialize a DFSResult to be used for queue priority.
1708 SchedImpl->initialize(this);
1709
1710 LLVM_DEBUG(dump());
1711 if (PrintDAGs) dump();
1713
1714 // Initialize ready queues now that the DAG and priority data are finalized.
1715 initQueues(TopRoots, BotRoots);
1716
1717 bool IsTopNode = false;
1718 while (true) {
1719 if (!checkSchedLimit())
1720 break;
1721
1722 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1723 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1724 if (!SU) break;
1725
1726 assert(!SU->isScheduled && "Node already scheduled");
1727
1728 scheduleMI(SU, IsTopNode);
1729
1730 if (DFSResult) {
1731 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1732 if (!ScheduledTrees.test(SubtreeID)) {
1733 ScheduledTrees.set(SubtreeID);
1734 DFSResult->scheduleTree(SubtreeID);
1735 SchedImpl->scheduleTree(SubtreeID);
1736 }
1737 }
1738
1739 // Notify the scheduling strategy after updating the DAG.
1740 SchedImpl->schedNode(SU, IsTopNode);
1741
1742 updateQueues(SU, IsTopNode);
1743 }
1744 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1745
1747
1748 LLVM_DEBUG({
1749 dbgs() << "*** Final schedule for "
1750 << printMBBReference(*begin()->getParent()) << " ***\n";
1751 dumpSchedule();
1752 dbgs() << '\n';
1753 });
1754}
1755
1756/// Build the DAG and setup three register pressure trackers.
1758 if (!ShouldTrackPressure) {
1759 RPTracker.reset();
1760 RegionCriticalPSets.clear();
1762 return;
1763 }
1764
1765 // Initialize the register pressure tracker used by buildSchedGraph.
1767 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1768
1769 // Account for liveness generate by the region boundary.
1770 if (LiveRegionEnd != RegionEnd)
1771 RPTracker.recede();
1772
1773 // Build the DAG, and compute current register pressure.
1775
1776 // Initialize top/bottom trackers after computing region pressure.
1778}
1779
1781 if (!DFSResult)
1782 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1783 DFSResult->clear();
1784 ScheduledTrees.clear();
1785 DFSResult->resize(SUnits.size());
1786 DFSResult->compute(SUnits);
1787 ScheduledTrees.resize(DFSResult->getNumSubtrees());
1788}
1789
1790/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1791/// only provides the critical path for single block loops. To handle loops that
1792/// span blocks, we could use the vreg path latencies provided by
1793/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1794/// available for use in the scheduler.
1795///
1796/// The cyclic path estimation identifies a def-use pair that crosses the back
1797/// edge and considers the depth and height of the nodes. For example, consider
1798/// the following instruction sequence where each instruction has unit latency
1799/// and defines an eponymous virtual register:
1800///
1801/// a->b(a,c)->c(b)->d(c)->exit
1802///
1803/// The cyclic critical path is a two cycles: b->c->b
1804/// The acyclic critical path is four cycles: a->b->c->d->exit
1805/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1806/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1807/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1808/// LiveInDepth = depth(b) = len(a->b) = 1
1809///
1810/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1811/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1812/// CyclicCriticalPath = min(2, 2) = 2
1813///
1814/// This could be relevant to PostRA scheduling, but is currently implemented
1815/// assuming LiveIntervals.
1817 // This only applies to single block loop.
1818 if (!BB->isSuccessor(BB))
1819 return 0;
1820
1821 unsigned MaxCyclicLatency = 0;
1822 // Visit each live out vreg def to find def/use pairs that cross iterations.
1823 for (const VRegMaskOrUnit &P : RPTracker.getPressure().LiveOutRegs) {
1824 if (!P.VRegOrUnit.isVirtualReg())
1825 continue;
1826 Register Reg = P.VRegOrUnit.asVirtualReg();
1827 const LiveInterval &LI = LIS->getInterval(Reg);
1828 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1829 if (!DefVNI)
1830 continue;
1831
1832 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1833 const SUnit *DefSU = getSUnit(DefMI);
1834 if (!DefSU)
1835 continue;
1836
1837 unsigned LiveOutHeight = DefSU->getHeight();
1838 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1839 // Visit all local users of the vreg def.
1840 for (const VReg2SUnit &V2SU
1841 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1842 SUnit *SU = V2SU.SU;
1843 if (SU == &ExitSU)
1844 continue;
1845
1846 // Only consider uses of the phi.
1847 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1848 if (!LRQ.valueIn()->isPHIDef())
1849 continue;
1850
1851 // Assume that a path spanning two iterations is a cycle, which could
1852 // overestimate in strange cases. This allows cyclic latency to be
1853 // estimated as the minimum slack of the vreg's depth or height.
1854 unsigned CyclicLatency = 0;
1855 if (LiveOutDepth > SU->getDepth())
1856 CyclicLatency = LiveOutDepth - SU->getDepth();
1857
1858 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1859 if (LiveInHeight > LiveOutHeight) {
1860 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1861 CyclicLatency = LiveInHeight - LiveOutHeight;
1862 } else
1863 CyclicLatency = 0;
1864
1865 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1866 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1867 if (CyclicLatency > MaxCyclicLatency)
1868 MaxCyclicLatency = CyclicLatency;
1869 }
1870 }
1871 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1872 return MaxCyclicLatency;
1873}
1874
1875/// Release ExitSU predecessors and setup scheduler queues. Re-position
1876/// the Top RP tracker in case the region beginning has changed.
1878 ArrayRef<SUnit*> BotRoots) {
1879 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1880 if (ShouldTrackPressure) {
1881 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1882 TopRPTracker.setPos(CurrentTop);
1883 }
1884}
1885
1886/// Move an instruction and update register pressure.
1887void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1888 // Move the instruction to its new location in the instruction stream.
1889 MachineInstr *MI = SU->getInstr();
1890
1891 if (IsTopNode) {
1892 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1893 if (&*CurrentTop == MI)
1895 else {
1897 TopRPTracker.setPos(MI);
1898 }
1899
1900 if (ShouldTrackPressure) {
1901 // Update top scheduled pressure.
1902 RegisterOperands RegOpers;
1903 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1904 /*IgnoreDead=*/false);
1906 // Adjust liveness and add missing dead+read-undef flags.
1907 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1908 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1909 } else {
1910 // Adjust for missing dead-def flags.
1911 RegOpers.detectDeadDefs(*MI, *LIS);
1912 }
1913
1914 TopRPTracker.advance(RegOpers);
1915 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1916 LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1917 TopRPTracker.getRegSetPressureAtPos(), TRI););
1918
1919 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1920 }
1921 } else {
1922 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1925 if (&*priorII == MI)
1926 CurrentBottom = priorII;
1927 else {
1928 if (&*CurrentTop == MI) {
1929 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1930 TopRPTracker.setPos(CurrentTop);
1931 }
1933 CurrentBottom = MI;
1935 }
1936 if (ShouldTrackPressure) {
1937 RegisterOperands RegOpers;
1938 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1939 /*IgnoreDead=*/false);
1941 // Adjust liveness and add missing dead+read-undef flags.
1942 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1943 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1944 } else {
1945 // Adjust for missing dead-def flags.
1946 RegOpers.detectDeadDefs(*MI, *LIS);
1947 }
1948
1949 if (BotRPTracker.getPos() != CurrentBottom)
1950 BotRPTracker.recedeSkipDebugValues();
1952 BotRPTracker.recede(RegOpers, &LiveUses);
1953 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1954 LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1955 BotRPTracker.getRegSetPressureAtPos(), TRI););
1956
1957 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1958 updatePressureDiffs(LiveUses);
1959 }
1960 }
1961}
1962
1963//===----------------------------------------------------------------------===//
1964// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1965//===----------------------------------------------------------------------===//
1966
1967namespace {
1968
1969/// Post-process the DAG to create cluster edges between neighboring
1970/// loads or between neighboring stores.
1971class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1972 struct MemOpInfo {
1973 SUnit *SU;
1975 int64_t Offset;
1976 LocationSize Width;
1977 bool OffsetIsScalable;
1978
1979 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1980 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1981 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1982 OffsetIsScalable(OffsetIsScalable) {}
1983
1984 static bool Compare(const MachineOperand *const &A,
1985 const MachineOperand *const &B) {
1986 if (A->getType() != B->getType())
1987 return A->getType() < B->getType();
1988 if (A->isReg())
1989 return A->getReg() < B->getReg();
1990 if (A->isFI()) {
1991 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1993 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1995 return StackGrowsDown ? A->getIndex() > B->getIndex()
1996 : A->getIndex() < B->getIndex();
1997 }
1998
1999 llvm_unreachable("MemOpClusterMutation only supports register or frame "
2000 "index bases.");
2001 }
2002
2003 bool operator<(const MemOpInfo &RHS) const {
2004 // FIXME: Don't compare everything twice. Maybe use C++20 three way
2005 // comparison instead when it's available.
2006 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
2007 RHS.BaseOps.begin(), RHS.BaseOps.end(),
2008 Compare))
2009 return true;
2010 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
2011 BaseOps.begin(), BaseOps.end(), Compare))
2012 return false;
2013 if (Offset != RHS.Offset)
2014 return Offset < RHS.Offset;
2015 return SU->NodeNum < RHS.SU->NodeNum;
2016 }
2017 };
2018
2019 const TargetInstrInfo *TII;
2020 const TargetRegisterInfo *TRI;
2021 bool IsLoad;
2022 bool ReorderWhileClustering;
2023
2024public:
2025 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
2026 const TargetRegisterInfo *tri, bool IsLoad,
2027 bool ReorderWhileClustering)
2028 : TII(tii), TRI(tri), IsLoad(IsLoad),
2029 ReorderWhileClustering(ReorderWhileClustering) {}
2030
2031 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2032
2033protected:
2034 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
2035 ScheduleDAGInstrs *DAG);
2036 void collectMemOpRecords(std::vector<SUnit> &SUnits,
2037 SmallVectorImpl<MemOpInfo> &MemOpRecords);
2038 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2039 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
2040};
2041
2042class StoreClusterMutation : public BaseMemOpClusterMutation {
2043public:
2044 StoreClusterMutation(const TargetInstrInfo *tii,
2045 const TargetRegisterInfo *tri,
2046 bool ReorderWhileClustering)
2047 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
2048};
2049
2050class LoadClusterMutation : public BaseMemOpClusterMutation {
2051public:
2052 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
2053 bool ReorderWhileClustering)
2054 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
2055};
2056
2057} // end anonymous namespace
2058
2059std::unique_ptr<ScheduleDAGMutation>
2061 const TargetRegisterInfo *TRI,
2062 bool ReorderWhileClustering) {
2063 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
2064 TII, TRI, ReorderWhileClustering)
2065 : nullptr;
2066}
2067
2068std::unique_ptr<ScheduleDAGMutation>
2070 const TargetRegisterInfo *TRI,
2071 bool ReorderWhileClustering) {
2072 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
2073 TII, TRI, ReorderWhileClustering)
2074 : nullptr;
2075}
2076
2077// Sorting all the loads/stores first, then for each load/store, checking the
2078// following load/store one by one, until reach the first non-dependent one and
2079// call target hook to see if they can cluster.
2080// If FastCluster is enabled, we assume that, all the loads/stores have been
2081// preprocessed and now, they didn't have dependencies on each other.
2082void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2083 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2084 ScheduleDAGInstrs *DAG) {
2085 // Keep track of the current cluster length and bytes for each SUnit.
2088
2089 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2090 // cluster mem ops collected within `MemOpRecords` array.
2091 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
2092 // Decision to cluster mem ops is taken based on target dependent logic
2093 auto MemOpa = MemOpRecords[Idx];
2094
2095 // Seek for the next load/store to do the cluster.
2096 unsigned NextIdx = Idx + 1;
2097 for (; NextIdx < End; ++NextIdx)
2098 // Skip if MemOpb has been clustered already or has dependency with
2099 // MemOpa.
2100 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
2101 (FastCluster ||
2102 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2103 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2104 break;
2105 if (NextIdx == End)
2106 continue;
2107
2108 auto MemOpb = MemOpRecords[NextIdx];
2109 unsigned ClusterLength = 2;
2110 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2111 MemOpb.Width.getValue().getKnownMinValue();
2112 auto It = SUnit2ClusterInfo.find(MemOpa.SU->NodeNum);
2113 if (It != SUnit2ClusterInfo.end()) {
2114 const auto &[Len, Bytes] = It->second;
2115 ClusterLength = Len + 1;
2116 CurrentClusterBytes = Bytes + MemOpb.Width.getValue().getKnownMinValue();
2117 }
2118
2119 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2120 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2121 MemOpb.Offset, MemOpb.OffsetIsScalable,
2122 ClusterLength, CurrentClusterBytes))
2123 continue;
2124
2125 SUnit *SUa = MemOpa.SU;
2126 SUnit *SUb = MemOpb.SU;
2127
2128 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2129 std::swap(SUa, SUb);
2130
2131 // FIXME: Is this check really required?
2132 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
2133 continue;
2134
2135 Clusters.unionSets(SUa, SUb);
2136 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2137 << SUb->NodeNum << ")\n");
2138 ++NumClustered;
2139
2140 if (IsLoad) {
2141 // Copy successor edges from SUa to SUb. Interleaving computation
2142 // dependent on SUa can prevent load combining due to register reuse.
2143 // Predecessor edges do not need to be copied from SUb to SUa since
2144 // nearby loads should have effectively the same inputs.
2145 for (const SDep &Succ : SUa->Succs) {
2146 if (Succ.getSUnit() == SUb)
2147 continue;
2148 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2149 << ")\n");
2150 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
2151 }
2152 } else {
2153 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
2154 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
2155 // do not need to be copied from SUa to SUb since no one will depend
2156 // on stores.
2157 // Notice that, we don't need to care about the memory dependency as
2158 // we won't try to cluster them if they have any memory dependency.
2159 for (const SDep &Pred : SUb->Preds) {
2160 if (Pred.getSUnit() == SUa)
2161 continue;
2162 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2163 << ")\n");
2164 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
2165 }
2166 }
2167
2168 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2169 CurrentClusterBytes};
2170
2171 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2172 << ", Curr cluster bytes: " << CurrentClusterBytes
2173 << "\n");
2174 }
2175
2176 // Add cluster group information.
2177 // Iterate over all of the equivalence sets.
2178 auto &AllClusters = DAG->getClusters();
2179 for (const EquivalenceClasses<SUnit *>::ECValue *I : Clusters) {
2180 if (!I->isLeader())
2181 continue;
2182 ClusterInfo Group;
2183 unsigned ClusterIdx = AllClusters.size();
2184 for (SUnit *MemberI : Clusters.members(*I)) {
2185 MemberI->ParentClusterIdx = ClusterIdx;
2186 Group.insert(MemberI);
2187 }
2188 AllClusters.push_back(Group);
2189 }
2190}
2191
2192void BaseMemOpClusterMutation::collectMemOpRecords(
2193 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2194 for (auto &SU : SUnits) {
2195 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
2196 (!IsLoad && !SU.getInstr()->mayStore()))
2197 continue;
2198
2199 const MachineInstr &MI = *SU.getInstr();
2201 int64_t Offset;
2202 bool OffsetIsScalable;
2205 OffsetIsScalable, Width, TRI)) {
2206 if (!Width.hasValue())
2207 continue;
2208
2209 MemOpRecords.push_back(
2210 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
2211
2212 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2213 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
2214 << ", Width: " << Width << "\n");
2215 }
2216#ifndef NDEBUG
2217 for (const auto *Op : BaseOps)
2218 assert(Op);
2219#endif
2220 }
2221}
2222
2223bool BaseMemOpClusterMutation::groupMemOps(
2226 bool FastCluster =
2228 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
2229
2230 for (const auto &MemOp : MemOps) {
2231 unsigned ChainPredID = DAG->SUnits.size();
2232 if (FastCluster) {
2233 for (const SDep &Pred : MemOp.SU->Preds) {
2234 // We only want to cluster the mem ops that have the same ctrl(non-data)
2235 // pred so that they didn't have ctrl dependency for each other. But for
2236 // store instrs, we can still cluster them if the pred is load instr.
2237 if ((Pred.isCtrl() &&
2238 (IsLoad ||
2239 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2240 !Pred.isArtificial()) {
2241 ChainPredID = Pred.getSUnit()->NodeNum;
2242 break;
2243 }
2244 }
2245 } else
2246 ChainPredID = 0;
2247
2248 Groups[ChainPredID].push_back(MemOp);
2249 }
2250 return FastCluster;
2251}
2252
2253/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2254void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2255 // Collect all the clusterable loads/stores
2256 SmallVector<MemOpInfo, 32> MemOpRecords;
2257 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2258
2259 if (MemOpRecords.size() < 2)
2260 return;
2261
2262 // Put the loads/stores without dependency into the same group with some
2263 // heuristic if the DAG is too complex to avoid compiling time blow up.
2264 // Notice that, some fusion pair could be lost with this.
2266 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2267
2268 for (auto &Group : Groups) {
2269 // Sorting the loads/stores, so that, we can stop the cluster as early as
2270 // possible.
2271 llvm::sort(Group.second);
2272
2273 // Trying to cluster all the neighboring loads/stores.
2274 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2275 }
2276}
2277
2278//===----------------------------------------------------------------------===//
2279// CopyConstrain - DAG post-processing to encourage copy elimination.
2280//===----------------------------------------------------------------------===//
2281
2282namespace {
2283
2284/// Post-process the DAG to create weak edges from all uses of a copy to
2285/// the one use that defines the copy's source vreg, most likely an induction
2286/// variable increment.
2287class CopyConstrain : public ScheduleDAGMutation {
2288 // Transient state.
2289 SlotIndex RegionBeginIdx;
2290
2291 // RegionEndIdx is the slot index of the last non-debug instruction in the
2292 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2293 SlotIndex RegionEndIdx;
2294
2295public:
2296 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2297
2298 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2299
2300protected:
2301 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2302};
2303
2304} // end anonymous namespace
2305
2306std::unique_ptr<ScheduleDAGMutation>
2308 const TargetRegisterInfo *TRI) {
2309 return std::make_unique<CopyConstrain>(TII, TRI);
2310}
2311
2312/// constrainLocalCopy handles two possibilities:
2313/// 1) Local src:
2314/// I0: = dst
2315/// I1: src = ...
2316/// I2: = dst
2317/// I3: dst = src (copy)
2318/// (create pred->succ edges I0->I1, I2->I1)
2319///
2320/// 2) Local copy:
2321/// I0: dst = src (copy)
2322/// I1: = dst
2323/// I2: src = ...
2324/// I3: = dst
2325/// (create pred->succ edges I1->I2, I3->I2)
2326///
2327/// Although the MachineScheduler is currently constrained to single blocks,
2328/// this algorithm should handle extended blocks. An EBB is a set of
2329/// contiguously numbered blocks such that the previous block in the EBB is
2330/// always the single predecessor.
2331void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2332 LiveIntervals *LIS = DAG->getLIS();
2333 MachineInstr *Copy = CopySU->getInstr();
2334
2335 // Check for pure vreg copies.
2336 const MachineOperand &SrcOp = Copy->getOperand(1);
2337 Register SrcReg = SrcOp.getReg();
2338 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2339 return;
2340
2341 const MachineOperand &DstOp = Copy->getOperand(0);
2342 Register DstReg = DstOp.getReg();
2343 if (!DstReg.isVirtual() || DstOp.isDead())
2344 return;
2345
2346 // Check if either the dest or source is local. If it's live across a back
2347 // edge, it's not local. Note that if both vregs are live across the back
2348 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2349 // If both the copy's source and dest are local live intervals, then we
2350 // should treat the dest as the global for the purpose of adding
2351 // constraints. This adds edges from source's other uses to the copy.
2352 unsigned LocalReg = SrcReg;
2353 unsigned GlobalReg = DstReg;
2354 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2355 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2356 LocalReg = DstReg;
2357 GlobalReg = SrcReg;
2358 LocalLI = &LIS->getInterval(LocalReg);
2359 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2360 return;
2361 }
2362 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2363
2364 // Find the global segment after the start of the local LI.
2365 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2366 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2367 // local live range. We could create edges from other global uses to the local
2368 // start, but the coalescer should have already eliminated these cases, so
2369 // don't bother dealing with it.
2370 if (GlobalSegment == GlobalLI->end())
2371 return;
2372
2373 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2374 // returned the next global segment. But if GlobalSegment overlaps with
2375 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2376 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2377 if (GlobalSegment->contains(LocalLI->beginIndex()))
2378 ++GlobalSegment;
2379
2380 if (GlobalSegment == GlobalLI->end())
2381 return;
2382
2383 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2384 if (GlobalSegment != GlobalLI->begin()) {
2385 // Two address defs have no hole.
2386 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2387 GlobalSegment->start)) {
2388 return;
2389 }
2390 // If the prior global segment may be defined by the same two-address
2391 // instruction that also defines LocalLI, then can't make a hole here.
2392 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2393 LocalLI->beginIndex())) {
2394 return;
2395 }
2396 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2397 // it would be a disconnected component in the live range.
2398 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2399 "Disconnected LRG within the scheduling region.");
2400 }
2401 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2402 if (!GlobalDef)
2403 return;
2404
2405 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2406 if (!GlobalSU)
2407 return;
2408
2409 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2410 // constraining the uses of the last local def to precede GlobalDef.
2411 SmallVector<SUnit*,8> LocalUses;
2412 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2413 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2414 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2415 for (const SDep &Succ : LastLocalSU->Succs) {
2416 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2417 continue;
2418 if (Succ.getSUnit() == GlobalSU)
2419 continue;
2420 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2421 return;
2422 LocalUses.push_back(Succ.getSUnit());
2423 }
2424 // Open the top of the GlobalLI hole by constraining any earlier global uses
2425 // to precede the start of LocalLI.
2426 SmallVector<SUnit*,8> GlobalUses;
2427 MachineInstr *FirstLocalDef =
2428 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2429 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2430 for (const SDep &Pred : GlobalSU->Preds) {
2431 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2432 continue;
2433 if (Pred.getSUnit() == FirstLocalSU)
2434 continue;
2435 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2436 return;
2437 GlobalUses.push_back(Pred.getSUnit());
2438 }
2439 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2440 // Add the weak edges.
2441 for (SUnit *LU : LocalUses) {
2442 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2443 << GlobalSU->NodeNum << ")\n");
2444 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2445 }
2446 for (SUnit *GU : GlobalUses) {
2447 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2448 << FirstLocalSU->NodeNum << ")\n");
2449 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2450 }
2451}
2452
2453/// Callback from DAG postProcessing to create weak edges to encourage
2454/// copy elimination.
2455void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2456 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2457 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2458
2459 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2460 if (FirstPos == DAG->end())
2461 return;
2462 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2463 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2464 *priorNonDebug(DAG->end(), DAG->begin()));
2465
2466 for (SUnit &SU : DAG->SUnits) {
2467 if (!SU.getInstr()->isCopy())
2468 continue;
2469
2470 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2471 }
2472}
2473
2474//===----------------------------------------------------------------------===//
2475// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2476// and possibly other custom schedulers.
2477//===----------------------------------------------------------------------===//
2478
2479static const unsigned InvalidCycle = ~0U;
2480
2482
2483/// Given a Count of resource usage and a Latency value, return true if a
2484/// SchedBoundary becomes resource limited.
2485/// If we are checking after scheduling a node, we should return true when
2486/// we just reach the resource limit.
2487static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2488 unsigned Latency, bool AfterSchedNode) {
2489 int ResCntFactor = (int)(Count - (Latency * LFactor));
2490 if (AfterSchedNode)
2491 return ResCntFactor >= (int)LFactor;
2492 else
2493 return ResCntFactor > (int)LFactor;
2494}
2495
2497 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2498 // Destroying and reconstructing it is very expensive though. So keep
2499 // invalid, placeholder HazardRecs.
2500 if (HazardRec && HazardRec->isEnabled()) {
2501 delete HazardRec;
2502 HazardRec = nullptr;
2503 }
2504 Available.clear();
2505 Pending.clear();
2506 CheckPending = false;
2507 CurrCycle = 0;
2508 CurrMOps = 0;
2509 MinReadyCycle = std::numeric_limits<unsigned>::max();
2510 ExpectedLatency = 0;
2511 DependentLatency = 0;
2512 RetiredMOps = 0;
2513 MaxExecutedResCount = 0;
2514 ZoneCritResIdx = 0;
2515 IsResourceLimited = false;
2516 ReservedCycles.clear();
2517 ReservedResourceSegments.clear();
2518 ReservedCyclesIndex.clear();
2519 ResourceGroupSubUnitMasks.clear();
2520#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2521 // Track the maximum number of stall cycles that could arise either from the
2522 // latency of a DAG edge or the number of cycles that a processor resource is
2523 // reserved (SchedBoundary::ReservedCycles).
2524 MaxObservedStall = 0;
2525#endif
2526 // Reserve a zero-count for invalid CritResIdx.
2527 ExecutedResCounts.resize(1);
2528 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2529}
2530
2532init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2533 reset();
2534 if (!SchedModel->hasInstrSchedModel())
2535 return;
2536 RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2537 for (SUnit &SU : DAG->SUnits) {
2538 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2539 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2540 * SchedModel->getMicroOpFactor();
2542 PI = SchedModel->getWriteProcResBegin(SC),
2543 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2544 unsigned PIdx = PI->ProcResourceIdx;
2545 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2546 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2547 RemainingCounts[PIdx] +=
2548 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2549 }
2550 }
2551}
2552
2554init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2555 reset();
2556 DAG = dag;
2557 SchedModel = smodel;
2558 Rem = rem;
2559 if (SchedModel->hasInstrSchedModel()) {
2560 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2561 ReservedCyclesIndex.resize(ResourceCount);
2562 ExecutedResCounts.resize(ResourceCount);
2563 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2564 unsigned NumUnits = 0;
2565
2566 for (unsigned i = 0; i < ResourceCount; ++i) {
2567 ReservedCyclesIndex[i] = NumUnits;
2568 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2569 if (isReservedGroup(i)) {
2570 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2571 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2572 U != UE; ++U)
2573 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2574 }
2575 }
2576
2577 ReservedCycles.resize(NumUnits, InvalidCycle);
2578 }
2579}
2580
2581/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2582/// these "soft stalls" differently than the hard stall cycles based on CPU
2583/// resources and computed by checkHazard(). A fully in-order model
2584/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2585/// available for scheduling until they are ready. However, a weaker in-order
2586/// model may use this for heuristics. For example, if a processor has in-order
2587/// behavior when reading certain resources, this may come into play.
2589 if (!SU->isUnbuffered)
2590 return 0;
2591
2592 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2593 if (ReadyCycle > CurrCycle)
2594 return ReadyCycle - CurrCycle;
2595 return 0;
2596}
2597
2598/// Compute the next cycle at which the given processor resource unit
2599/// can be scheduled.
2601 unsigned ReleaseAtCycle,
2602 unsigned AcquireAtCycle) {
2603 if (SchedModel && SchedModel->enableIntervals()) {
2604 if (isTop())
2605 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2606 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2607
2608 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2609 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2610 }
2611
2612 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2613 // If this resource has never been used, always return cycle zero.
2614 if (NextUnreserved == InvalidCycle)
2615 return CurrCycle;
2616 // For bottom-up scheduling add the cycles needed for the current operation.
2617 if (!isTop())
2618 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2619 return NextUnreserved;
2620}
2621
2622/// Compute the next cycle at which the given processor resource can be
2623/// scheduled. Returns the next cycle and the index of the processor resource
2624/// instance in the reserved cycles vector.
2625std::pair<unsigned, unsigned>
2627 unsigned ReleaseAtCycle,
2628 unsigned AcquireAtCycle) {
2630 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2632 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2633 }
2634 unsigned MinNextUnreserved = InvalidCycle;
2635 unsigned InstanceIdx = 0;
2636 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2637 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2638 assert(NumberOfInstances > 0 &&
2639 "Cannot have zero instances of a ProcResource");
2640
2641 if (isReservedGroup(PIdx)) {
2642 // If any subunits are used by the instruction, report that the
2643 // subunits of the resource group are available at the first cycle
2644 // in which the unit is available, effectively removing the group
2645 // record from hazarding and basing the hazarding decisions on the
2646 // subunit records. Otherwise, choose the first available instance
2647 // from among the subunits. Specifications which assign cycles to
2648 // both the subunits and the group or which use an unbuffered
2649 // group with buffered subunits will appear to schedule
2650 // strangely. In the first case, the additional cycles for the
2651 // group will be ignored. In the second, the group will be
2652 // ignored entirely.
2653 for (const MCWriteProcResEntry &PE :
2654 make_range(SchedModel->getWriteProcResBegin(SC),
2655 SchedModel->getWriteProcResEnd(SC)))
2656 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2657 return std::make_pair(getNextResourceCycleByInstance(
2658 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2659 StartIndex);
2660
2661 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2662 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2663 unsigned NextUnreserved, NextInstanceIdx;
2664 std::tie(NextUnreserved, NextInstanceIdx) =
2665 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2666 if (MinNextUnreserved > NextUnreserved) {
2667 InstanceIdx = NextInstanceIdx;
2668 MinNextUnreserved = NextUnreserved;
2669 }
2670 }
2671 return std::make_pair(MinNextUnreserved, InstanceIdx);
2672 }
2673
2674 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2675 ++I) {
2676 unsigned NextUnreserved =
2677 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2679 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2680 << NextUnreserved << "c\n");
2681 if (MinNextUnreserved > NextUnreserved) {
2682 InstanceIdx = I;
2683 MinNextUnreserved = NextUnreserved;
2684 }
2685 }
2687 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2688 << "[" << InstanceIdx - StartIndex << "]"
2689 << " available @" << MinNextUnreserved << "c"
2690 << "\n");
2691 return std::make_pair(MinNextUnreserved, InstanceIdx);
2692}
2693
2694/// Does this SU have a hazard within the current instruction group.
2695///
2696/// The scheduler supports two modes of hazard recognition. The first is the
2697/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2698/// supports highly complicated in-order reservation tables
2699/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2700///
2701/// The second is a streamlined mechanism that checks for hazards based on
2702/// simple counters that the scheduler itself maintains. It explicitly checks
2703/// for instruction dispatch limitations, including the number of micro-ops that
2704/// can dispatch per cycle.
2705///
2706/// TODO: Also check whether the SU must start a new group.
2708 if (HazardRec->isEnabled()
2709 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2711 << "hazard: SU(" << SU->NodeNum << ") reported by HazardRec\n");
2712 return true;
2713 }
2714
2715 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2716 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2717 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") uops="
2718 << uops << ", CurrMOps = " << CurrMOps << ", "
2719 << "CurrMOps + uops > issue width of "
2720 << SchedModel->getIssueWidth() << "\n");
2721 return true;
2722 }
2723
2724 if (CurrMOps > 0 &&
2725 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2726 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2727 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") must "
2728 << (isTop() ? "begin" : "end") << " group\n");
2729 return true;
2730 }
2731
2732 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2733 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2734 for (const MCWriteProcResEntry &PE :
2735 make_range(SchedModel->getWriteProcResBegin(SC),
2736 SchedModel->getWriteProcResEnd(SC))) {
2737 unsigned ResIdx = PE.ProcResourceIdx;
2738 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2739 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2740 unsigned NRCycle, InstanceIdx;
2741 std::tie(NRCycle, InstanceIdx) =
2742 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2743 if (NRCycle > CurrCycle) {
2744#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2745 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2746#endif
2748 << "hazard: SU(" << SU->NodeNum << ") "
2749 << SchedModel->getResourceName(ResIdx) << '['
2750 << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' << "="
2751 << NRCycle << "c, is later than "
2752 << "CurrCycle = " << CurrCycle << "c\n");
2753 return true;
2754 }
2755 }
2756 }
2757 return false;
2758}
2759
2760// Find the unscheduled node in ReadySUs with the highest latency.
2763 SUnit *LateSU = nullptr;
2764 unsigned RemLatency = 0;
2765 for (SUnit *SU : ReadySUs) {
2766 unsigned L = getUnscheduledLatency(SU);
2767 if (L > RemLatency) {
2768 RemLatency = L;
2769 LateSU = SU;
2770 }
2771 }
2772 if (LateSU) {
2773 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2774 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2775 }
2776 return RemLatency;
2777}
2778
2779// Count resources in this zone and the remaining unscheduled
2780// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2781// resource index, or zero if the zone is issue limited.
2783getOtherResourceCount(unsigned &OtherCritIdx) {
2784 OtherCritIdx = 0;
2785 if (!SchedModel->hasInstrSchedModel())
2786 return 0;
2787
2788 unsigned OtherCritCount = Rem->RemIssueCount
2789 + (RetiredMOps * SchedModel->getMicroOpFactor());
2790 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2791 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2792 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2793 PIdx != PEnd; ++PIdx) {
2794 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2795 if (OtherCount > OtherCritCount) {
2796 OtherCritCount = OtherCount;
2797 OtherCritIdx = PIdx;
2798 }
2799 }
2800 if (OtherCritIdx) {
2801 LLVM_DEBUG(
2802 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2803 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2804 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2805 }
2806 return OtherCritCount;
2807}
2808
2809void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2810 unsigned Idx) {
2811 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2812
2813#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2814 // ReadyCycle was been bumped up to the CurrCycle when this node was
2815 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2816 // scheduling, so may now be greater than ReadyCycle.
2817 if (ReadyCycle > CurrCycle)
2818 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2819#endif
2820
2821 if (ReadyCycle < MinReadyCycle)
2822 MinReadyCycle = ReadyCycle;
2823
2824 // Check for interlocks first. For the purpose of other heuristics, an
2825 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2826 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2827 bool HazardDetected = !IsBuffered && ReadyCycle > CurrCycle;
2828 if (HazardDetected)
2829 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum
2830 << ") ReadyCycle = " << ReadyCycle
2831 << " is later than CurrCycle = " << CurrCycle
2832 << " on an unbuffered resource" << "\n");
2833 else
2834 HazardDetected = checkHazard(SU);
2835
2836 if (!HazardDetected && Available.size() >= ReadyListLimit) {
2837 HazardDetected = true;
2838 LLVM_DEBUG(dbgs().indent(2) << "hazard: Available Q is full (size: "
2839 << Available.size() << ")\n");
2840 }
2841
2842 if (!HazardDetected) {
2843 Available.push(SU);
2845 << "Move SU(" << SU->NodeNum << ") into Available Q\n");
2846
2847 if (InPQueue)
2848 Pending.remove(Pending.begin() + Idx);
2849 return;
2850 }
2851
2852 if (!InPQueue)
2853 Pending.push(SU);
2854}
2855
2856/// Move the boundary of scheduled code by one cycle.
2857void SchedBoundary::bumpCycle(unsigned NextCycle) {
2858 if (SchedModel->getMicroOpBufferSize() == 0) {
2859 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2860 "MinReadyCycle uninitialized");
2861 if (MinReadyCycle > NextCycle)
2862 NextCycle = MinReadyCycle;
2863 }
2864 // Update the current micro-ops, which will issue in the next cycle.
2865 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2866 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2867
2868 // Decrement DependentLatency based on the next cycle.
2869 if ((NextCycle - CurrCycle) > DependentLatency)
2870 DependentLatency = 0;
2871 else
2872 DependentLatency -= (NextCycle - CurrCycle);
2873
2874 if (!HazardRec->isEnabled()) {
2875 // Bypass HazardRec virtual calls.
2876 CurrCycle = NextCycle;
2877 } else {
2878 // Bypass getHazardType calls in case of long latency.
2879 for (; CurrCycle != NextCycle; ++CurrCycle) {
2880 if (isTop())
2881 HazardRec->AdvanceCycle();
2882 else
2883 HazardRec->RecedeCycle();
2884 }
2885 }
2886 CheckPending = true;
2887 IsResourceLimited =
2888 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2889 getScheduledLatency(), true);
2890
2891 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2892 << '\n');
2893}
2894
2895void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2896 ExecutedResCounts[PIdx] += Count;
2897 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2898 MaxExecutedResCount = ExecutedResCounts[PIdx];
2899}
2900
2901/// Add the given processor resource to this scheduled zone.
2902///
2903/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2904/// cycles during which this resource is released.
2905///
2906/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2907/// cycles at which the resource is aquired after issue (assuming no stalls).
2908///
2909/// \return the next cycle at which the instruction may execute without
2910/// oversubscribing resources.
2911unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2912 unsigned ReleaseAtCycle,
2913 unsigned NextCycle,
2914 unsigned AcquireAtCycle) {
2915 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2916 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2917 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2918 << ReleaseAtCycle << "x" << Factor << "u\n");
2919
2920 // Update Executed resources counts.
2922 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2923 Rem->RemainingCounts[PIdx] -= Count;
2924
2925 // Check if this resource exceeds the current critical resource. If so, it
2926 // becomes the critical resource.
2927 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2928 ZoneCritResIdx = PIdx;
2929 LLVM_DEBUG(dbgs() << " *** Critical resource "
2930 << SchedModel->getResourceName(PIdx) << ": "
2931 << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2932 << "c\n");
2933 }
2934 // For reserved resources, record the highest cycle using the resource.
2935 unsigned NextAvailable, InstanceIdx;
2936 std::tie(NextAvailable, InstanceIdx) =
2937 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2938 if (NextAvailable > CurrCycle) {
2939 LLVM_DEBUG(dbgs() << " Resource conflict: "
2940 << SchedModel->getResourceName(PIdx)
2941 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2942 << " reserved until @" << NextAvailable << "\n");
2943 }
2944 return NextAvailable;
2945}
2946
2947/// Move the boundary of scheduled code by one SUnit.
2949 // Update the reservation table.
2950 if (HazardRec->isEnabled()) {
2951 if (!isTop() && SU->isCall) {
2952 // Calls are scheduled with their preceding instructions. For bottom-up
2953 // scheduling, clear the pipeline state before emitting.
2954 HazardRec->Reset();
2955 }
2956 HazardRec->EmitInstruction(SU);
2957 // Scheduling an instruction may have made pending instructions available.
2958 CheckPending = true;
2959 }
2960 // checkHazard should prevent scheduling multiple instructions per cycle that
2961 // exceed the issue width.
2962 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2963 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2964 assert(
2965 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2966 "Cannot schedule this instruction's MicroOps in the current cycle.");
2967
2968 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2969 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2970
2971 unsigned NextCycle = CurrCycle;
2972 switch (SchedModel->getMicroOpBufferSize()) {
2973 case 0:
2974 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2975 break;
2976 case 1:
2977 if (ReadyCycle > NextCycle) {
2978 NextCycle = ReadyCycle;
2979 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2980 }
2981 break;
2982 default:
2983 // We don't currently model the OOO reorder buffer, so consider all
2984 // scheduled MOps to be "retired". We do loosely model in-order resource
2985 // latency. If this instruction uses an in-order resource, account for any
2986 // likely stall cycles.
2987 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2988 NextCycle = ReadyCycle;
2989 break;
2990 }
2991 RetiredMOps += IncMOps;
2992
2993 // Update resource counts and critical resource.
2994 if (SchedModel->hasInstrSchedModel()) {
2995 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2996 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2997 Rem->RemIssueCount -= DecRemIssue;
2998 if (ZoneCritResIdx) {
2999 // Scale scheduled micro-ops for comparing with the critical resource.
3000 unsigned ScaledMOps =
3001 RetiredMOps * SchedModel->getMicroOpFactor();
3002
3003 // If scaled micro-ops are now more than the previous critical resource by
3004 // a full cycle, then micro-ops issue becomes critical.
3005 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
3006 >= (int)SchedModel->getLatencyFactor()) {
3007 ZoneCritResIdx = 0;
3008 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
3009 << ScaledMOps / SchedModel->getLatencyFactor()
3010 << "c\n");
3011 }
3012 }
3014 PI = SchedModel->getWriteProcResBegin(SC),
3015 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3016 unsigned RCycle =
3017 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
3018 PI->AcquireAtCycle);
3019 if (RCycle > NextCycle)
3020 NextCycle = RCycle;
3021 }
3022 if (SU->hasReservedResource) {
3023 // For reserved resources, record the highest cycle using the resource.
3024 // For top-down scheduling, this is the cycle in which we schedule this
3025 // instruction plus the number of cycles the operations reserves the
3026 // resource. For bottom-up is it simply the instruction's cycle.
3028 PI = SchedModel->getWriteProcResBegin(SC),
3029 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3030 unsigned PIdx = PI->ProcResourceIdx;
3031 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
3032
3033 if (SchedModel && SchedModel->enableIntervals()) {
3034 unsigned ReservedUntil, InstanceIdx;
3035 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3036 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3037 if (isTop()) {
3038 ReservedResourceSegments[InstanceIdx].add(
3040 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3042 } else {
3043 ReservedResourceSegments[InstanceIdx].add(
3045 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3047 }
3048 } else {
3049
3050 unsigned ReservedUntil, InstanceIdx;
3051 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3052 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3053 if (isTop()) {
3054 ReservedCycles[InstanceIdx] =
3055 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
3056 } else
3057 ReservedCycles[InstanceIdx] = NextCycle;
3058 }
3059 }
3060 }
3061 }
3062 }
3063 // Update ExpectedLatency and DependentLatency.
3064 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
3065 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
3066 if (SU->getDepth() > TopLatency) {
3067 TopLatency = SU->getDepth();
3068 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
3069 << SU->NodeNum << ") " << TopLatency << "c\n");
3070 }
3071 if (SU->getHeight() > BotLatency) {
3072 BotLatency = SU->getHeight();
3073 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
3074 << SU->NodeNum << ") " << BotLatency << "c\n");
3075 }
3076 // If we stall for any reason, bump the cycle.
3077 if (NextCycle > CurrCycle)
3078 bumpCycle(NextCycle);
3079 else
3080 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
3081 // resource limited. If a stall occurred, bumpCycle does this.
3082 IsResourceLimited =
3083 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
3084 getScheduledLatency(), true);
3085
3086 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
3087 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
3088 // one cycle. Since we commonly reach the max MOps here, opportunistically
3089 // bump the cycle to avoid uselessly checking everything in the readyQ.
3090 CurrMOps += IncMOps;
3091
3092 // Bump the cycle count for issue group constraints.
3093 // This must be done after NextCycle has been adjust for all other stalls.
3094 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
3095 // currCycle to X.
3096 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
3097 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
3098 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
3099 << " group\n");
3100 bumpCycle(++NextCycle);
3101 }
3102
3103 while (CurrMOps >= SchedModel->getIssueWidth()) {
3104 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
3105 << CurrCycle << '\n');
3106 bumpCycle(++NextCycle);
3107 }
3109}
3110
3111/// Release pending ready nodes in to the available queue. This makes them
3112/// visible to heuristics.
3114 // If the available queue is empty, it is safe to reset MinReadyCycle.
3115 if (Available.empty())
3116 MinReadyCycle = std::numeric_limits<unsigned>::max();
3117
3118 // Check to see if any of the pending instructions are ready to issue. If
3119 // so, add them to the available queue.
3120 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
3121 SUnit *SU = *(Pending.begin() + I);
3122 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3123
3124 LLVM_DEBUG(dbgs() << "Checking pending node SU(" << SU->NodeNum << ")\n");
3125
3126 if (ReadyCycle < MinReadyCycle)
3127 MinReadyCycle = ReadyCycle;
3128
3129 if (Available.size() >= ReadyListLimit)
3130 break;
3131
3132 releaseNode(SU, ReadyCycle, true, I);
3133 if (E != Pending.size()) {
3134 --I;
3135 --E;
3136 }
3137 }
3138 CheckPending = false;
3139}
3140
3141/// Remove SU from the ready set for this boundary.
3143 if (Available.isInQueue(SU))
3144 Available.remove(Available.find(SU));
3145 else {
3146 assert(Pending.isInQueue(SU) && "bad ready count");
3147 Pending.remove(Pending.find(SU));
3148 }
3149}
3150
3151/// If this queue only has one ready candidate, return it. As a side effect,
3152/// defer any nodes that now hit a hazard, and advance the cycle until at least
3153/// one node is ready. If multiple instructions are ready, return NULL.
3155 if (CheckPending)
3157
3158 // Defer any ready instrs that now have a hazard.
3159 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
3160 if (checkHazard(*I)) {
3161 Pending.push(*I);
3162 I = Available.remove(I);
3163 continue;
3164 }
3165 ++I;
3166 }
3167 for (unsigned i = 0; Available.empty(); ++i) {
3168// FIXME: Re-enable assert once PR20057 is resolved.
3169// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3170// "permanent hazard");
3171 (void)i;
3172 bumpCycle(CurrCycle + 1);
3174 }
3175
3176 LLVM_DEBUG(Pending.dump());
3177 LLVM_DEBUG(Available.dump());
3178
3179 if (Available.size() == 1)
3180 return *Available.begin();
3181 return nullptr;
3182}
3183
3184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3185
3186/// Dump the content of the \ref ReservedCycles vector for the
3187/// resources that are used in the basic block.
3188///
3190 if (!SchedModel->hasInstrSchedModel())
3191 return;
3192
3193 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3194 unsigned StartIdx = 0;
3195
3196 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3197 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3198 std::string ResName = SchedModel->getResourceName(ResIdx);
3199 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3200 dbgs() << ResName << "(" << UnitIdx << ") = ";
3201 if (SchedModel && SchedModel->enableIntervals()) {
3202 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3203 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3204 else
3205 dbgs() << "{ }\n";
3206 } else
3207 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3208 }
3209 StartIdx += NumUnits;
3210 }
3211}
3212
3213// This is useful information to dump after bumpNode.
3214// Note that the Queue contents are more useful before pickNodeFromQueue.
3216 unsigned ResFactor;
3217 unsigned ResCount;
3218 if (ZoneCritResIdx) {
3219 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3220 ResCount = getResourceCount(ZoneCritResIdx);
3221 } else {
3222 ResFactor = SchedModel->getMicroOpFactor();
3223 ResCount = RetiredMOps * ResFactor;
3224 }
3225 unsigned LFactor = SchedModel->getLatencyFactor();
3226 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3227 << " Retired: " << RetiredMOps;
3228 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3229 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3230 << ResCount / ResFactor << " "
3231 << SchedModel->getResourceName(ZoneCritResIdx)
3232 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3233 << (IsResourceLimited ? " - Resource" : " - Latency")
3234 << " limited.\n";
3237}
3238#endif
3239
3240//===----------------------------------------------------------------------===//
3241// GenericScheduler - Generic implementation of MachineSchedStrategy.
3242//===----------------------------------------------------------------------===//
3243
3247 if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
3248 return;
3249
3250 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3252 PI = SchedModel->getWriteProcResBegin(SC),
3253 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3254 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3255 ResDelta.CritResources += PI->ReleaseAtCycle;
3256 if (PI->ProcResourceIdx == Policy.DemandResIdx)
3257 ResDelta.DemandedResources += PI->ReleaseAtCycle;
3258 }
3259}
3260
3261/// Compute remaining latency. We need this both to determine whether the
3262/// overall schedule has become latency-limited and whether the instructions
3263/// outside this zone are resource or latency limited.
3264///
3265/// The "dependent" latency is updated incrementally during scheduling as the
3266/// max height/depth of scheduled nodes minus the cycles since it was
3267/// scheduled:
3268/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3269///
3270/// The "independent" latency is the max ready queue depth:
3271/// ILat = max N.depth for N in Available|Pending
3272///
3273/// RemainingLatency is the greater of independent and dependent latency.
3274///
3275/// These computations are expensive, especially in DAGs with many edges, so
3276/// only do them if necessary.
3277static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3278 unsigned RemLatency = CurrZone.getDependentLatency();
3279 RemLatency = std::max(RemLatency,
3280 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3281 RemLatency = std::max(RemLatency,
3282 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3283 return RemLatency;
3284}
3285
3286/// Returns true if the current cycle plus remaning latency is greater than
3287/// the critical path in the scheduling region.
3288bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3289 SchedBoundary &CurrZone,
3290 bool ComputeRemLatency,
3291 unsigned &RemLatency) const {
3292 // The current cycle is already greater than the critical path, so we are
3293 // already latency limited and don't need to compute the remaining latency.
3294 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3295 return true;
3296
3297 // If we haven't scheduled anything yet, then we aren't latency limited.
3298 if (CurrZone.getCurrCycle() == 0)
3299 return false;
3300
3301 if (ComputeRemLatency)
3302 RemLatency = computeRemLatency(CurrZone);
3303
3304 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3305}
3306
3307/// Set the CandPolicy given a scheduling zone given the current resources and
3308/// latencies inside and outside the zone.
3310 SchedBoundary &CurrZone,
3311 SchedBoundary *OtherZone) {
3312 // Apply preemptive heuristics based on the total latency and resources
3313 // inside and outside this zone. Potential stalls should be considered before
3314 // following this policy.
3315
3316 // Compute the critical resource outside the zone.
3317 unsigned OtherCritIdx = 0;
3318 unsigned OtherCount =
3319 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3320
3321 bool OtherResLimited = false;
3322 unsigned RemLatency = 0;
3323 bool RemLatencyComputed = false;
3324 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3325 RemLatency = computeRemLatency(CurrZone);
3326 RemLatencyComputed = true;
3327 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3328 OtherCount, RemLatency, false);
3329 }
3330
3331 // Schedule aggressively for latency in PostRA mode. We don't check for
3332 // acyclic latency during PostRA, and highly out-of-order processors will
3333 // skip PostRA scheduling.
3334 if (!OtherResLimited &&
3335 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3336 RemLatency))) {
3337 Policy.ReduceLatency |= true;
3338 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3339 << " RemainingLatency " << RemLatency << " + "
3340 << CurrZone.getCurrCycle() << "c > CritPath "
3341 << Rem.CriticalPath << "\n");
3342 }
3343 // If the same resource is limiting inside and outside the zone, do nothing.
3344 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3345 return;
3346
3347 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3348 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3349 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3350 } if (OtherResLimited) dbgs()
3351 << " RemainingLimit: "
3352 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3353 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3354 << " Latency limited both directions.\n");
3355
3356 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3357 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3358
3359 if (OtherResLimited)
3360 Policy.DemandResIdx = OtherCritIdx;
3361}
3362
3363#ifndef NDEBUG
3366 // clang-format off
3367 switch (Reason) {
3368 case NoCand: return "NOCAND ";
3369 case Only1: return "ONLY1 ";
3370 case PhysReg: return "PHYS-REG ";
3371 case RegExcess: return "REG-EXCESS";
3372 case RegCritical: return "REG-CRIT ";
3373 case Stall: return "STALL ";
3374 case Cluster: return "CLUSTER ";
3375 case Weak: return "WEAK ";
3376 case RegMax: return "REG-MAX ";
3377 case ResourceReduce: return "RES-REDUCE";
3378 case ResourceDemand: return "RES-DEMAND";
3379 case TopDepthReduce: return "TOP-DEPTH ";
3380 case TopPathReduce: return "TOP-PATH ";
3381 case BotHeightReduce:return "BOT-HEIGHT";
3382 case BotPathReduce: return "BOT-PATH ";
3383 case NodeOrder: return "ORDER ";
3384 case FirstValid: return "FIRST ";
3385 };
3386 // clang-format on
3387 llvm_unreachable("Unknown reason!");
3388}
3389
3392 unsigned ResIdx = 0;
3393 unsigned Latency = 0;
3394 switch (Cand.Reason) {
3395 default:
3396 break;
3397 case RegExcess:
3398 P = Cand.RPDelta.Excess;
3399 break;
3400 case RegCritical:
3401 P = Cand.RPDelta.CriticalMax;
3402 break;
3403 case RegMax:
3404 P = Cand.RPDelta.CurrentMax;
3405 break;
3406 case ResourceReduce:
3407 ResIdx = Cand.Policy.ReduceResIdx;
3408 break;
3409 case ResourceDemand:
3410 ResIdx = Cand.Policy.DemandResIdx;
3411 break;
3412 case TopDepthReduce:
3413 Latency = Cand.SU->getDepth();
3414 break;
3415 case TopPathReduce:
3416 Latency = Cand.SU->getHeight();
3417 break;
3418 case BotHeightReduce:
3419 Latency = Cand.SU->getHeight();
3420 break;
3421 case BotPathReduce:
3422 Latency = Cand.SU->getDepth();
3423 break;
3424 }
3425 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3426 if (P.isValid())
3427 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3428 << ":" << P.getUnitInc() << " ";
3429 else
3430 dbgs() << " ";
3431 if (ResIdx)
3432 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3433 else
3434 dbgs() << " ";
3435 if (Latency)
3436 dbgs() << " " << Latency << " cycles ";
3437 else
3438 dbgs() << " ";
3439 dbgs() << '\n';
3440}
3441#endif
3442
3443/// Return true if this heuristic determines order.
3444/// TODO: Consider refactor return type of these functions as integer or enum,
3445/// as we may need to differentiate whether TryCand is better than Cand.
3446bool llvm::tryLess(int TryVal, int CandVal,
3450 if (TryVal < CandVal) {
3451 TryCand.Reason = Reason;
3452 return true;
3453 }
3454 if (TryVal > CandVal) {
3455 if (Cand.Reason > Reason)
3456 Cand.Reason = Reason;
3457 return true;
3458 }
3459 return false;
3460}
3461
3462bool llvm::tryGreater(int TryVal, int CandVal,
3466 if (TryVal > CandVal) {
3467 TryCand.Reason = Reason;
3468 return true;
3469 }
3470 if (TryVal < CandVal) {
3471 if (Cand.Reason > Reason)
3472 Cand.Reason = Reason;
3473 return true;
3474 }
3475 return false;
3476}
3477
3480 SchedBoundary &Zone) {
3481 if (Zone.isTop()) {
3482 // Prefer the candidate with the lesser depth, but only if one of them has
3483 // depth greater than the total latency scheduled so far, otherwise either
3484 // of them could be scheduled now with no stall.
3485 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3486 Zone.getScheduledLatency()) {
3487 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3489 return true;
3490 }
3491 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3493 return true;
3494 } else {
3495 // Prefer the candidate with the lesser height, but only if one of them has
3496 // height greater than the total latency scheduled so far, otherwise either
3497 // of them could be scheduled now with no stall.
3498 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3499 Zone.getScheduledLatency()) {
3500 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3502 return true;
3503 }
3504 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3506 return true;
3507 }
3508 return false;
3509}
3510
3511static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
3512 bool IsPostRA = false) {
3513 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3514 << GenericSchedulerBase::getReasonStr(Reason) << " ["
3515 << (IsPostRA ? "post-RA" : "pre-RA") << "]\n");
3516
3517 if (IsPostRA) {
3518 if (IsTop)
3519 NumTopPostRA++;
3520 else
3521 NumBotPostRA++;
3522
3523 switch (Reason) {
3525 NumNoCandPostRA++;
3526 return;
3528 NumOnly1PostRA++;
3529 return;
3531 NumPhysRegPostRA++;
3532 return;
3534 NumRegExcessPostRA++;
3535 return;
3537 NumRegCriticalPostRA++;
3538 return;
3540 NumStallPostRA++;
3541 return;
3543 NumClusterPostRA++;
3544 return;
3546 NumWeakPostRA++;
3547 return;
3549 NumRegMaxPostRA++;
3550 return;
3552 NumResourceReducePostRA++;
3553 return;
3555 NumResourceDemandPostRA++;
3556 return;
3558 NumTopDepthReducePostRA++;
3559 return;
3561 NumTopPathReducePostRA++;
3562 return;
3564 NumBotHeightReducePostRA++;
3565 return;
3567 NumBotPathReducePostRA++;
3568 return;
3570 NumNodeOrderPostRA++;
3571 return;
3573 NumFirstValidPostRA++;
3574 return;
3575 };
3576 } else {
3577 if (IsTop)
3578 NumTopPreRA++;
3579 else
3580 NumBotPreRA++;
3581
3582 switch (Reason) {
3584 NumNoCandPreRA++;
3585 return;
3587 NumOnly1PreRA++;
3588 return;
3590 NumPhysRegPreRA++;
3591 return;
3593 NumRegExcessPreRA++;
3594 return;
3596 NumRegCriticalPreRA++;
3597 return;
3599 NumStallPreRA++;
3600 return;
3602 NumClusterPreRA++;
3603 return;
3605 NumWeakPreRA++;
3606 return;
3608 NumRegMaxPreRA++;
3609 return;
3611 NumResourceReducePreRA++;
3612 return;
3614 NumResourceDemandPreRA++;
3615 return;
3617 NumTopDepthReducePreRA++;
3618 return;
3620 NumTopPathReducePreRA++;
3621 return;
3623 NumBotHeightReducePreRA++;
3624 return;
3626 NumBotPathReducePreRA++;
3627 return;
3629 NumNodeOrderPreRA++;
3630 return;
3632 NumFirstValidPreRA++;
3633 return;
3634 };
3635 }
3636 llvm_unreachable("Unknown reason!");
3637}
3638
3640 bool IsPostRA = false) {
3641 tracePick(Cand.Reason, Cand.AtTop, IsPostRA);
3642}
3643
3645 assert(dag->hasVRegLiveness() &&
3646 "(PreRA)GenericScheduler needs vreg liveness");
3647 DAG = static_cast<ScheduleDAGMILive*>(dag);
3648 SchedModel = DAG->getSchedModel();
3649 TRI = DAG->TRI;
3650
3651 if (RegionPolicy.ComputeDFSResult)
3652 DAG->computeDFSResult();
3653
3654 Rem.init(DAG, SchedModel);
3655 Top.init(DAG, SchedModel, &Rem);
3656 Bot.init(DAG, SchedModel, &Rem);
3657
3658 // Initialize resource counts.
3659
3660 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3661 // are disabled, then these HazardRecs will be disabled.
3662 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3663 if (!Top.HazardRec) {
3664 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3665 }
3666 if (!Bot.HazardRec) {
3667 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3668 }
3669 TopCand.SU = nullptr;
3670 BotCand.SU = nullptr;
3671
3674}
3675
3676/// Initialize the per-region scheduling policy.
3679 unsigned NumRegionInstrs) {
3680 const MachineFunction &MF = *Begin->getMF();
3681 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3682
3683 // Avoid setting up the register pressure tracker for small regions to save
3684 // compile time. As a rough heuristic, only track pressure when the number of
3685 // schedulable instructions exceeds half the allocatable integer register file
3686 // that is the largest legal integer regiser type.
3687 RegionPolicy.ShouldTrackPressure = true;
3688 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3690 if (TLI->isTypeLegal(LegalIntVT)) {
3691 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3692 TLI->getRegClassFor(LegalIntVT));
3693 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3694 break;
3695 }
3696 }
3697
3698 // For generic targets, we default to bottom-up, because it's simpler and more
3699 // compile-time optimizations have been implemented in that direction.
3700 RegionPolicy.OnlyBottomUp = true;
3701
3702 // Allow the subtarget to override default policy.
3703 SchedRegion Region(Begin, End, NumRegionInstrs);
3705
3706 // After subtarget overrides, apply command line options.
3707 if (!EnableRegPressure) {
3708 RegionPolicy.ShouldTrackPressure = false;
3709 RegionPolicy.ShouldTrackLaneMasks = false;
3710 }
3711
3713 RegionPolicy.OnlyTopDown = true;
3714 RegionPolicy.OnlyBottomUp = false;
3715 } else if (PreRADirection == MISched::BottomUp) {
3716 RegionPolicy.OnlyTopDown = false;
3717 RegionPolicy.OnlyBottomUp = true;
3718 } else if (PreRADirection == MISched::Bidirectional) {
3719 RegionPolicy.OnlyBottomUp = false;
3720 RegionPolicy.OnlyTopDown = false;
3721 }
3722
3723 BotIdx = NumRegionInstrs - 1;
3724 this->NumRegionInstrs = NumRegionInstrs;
3725}
3726
3728 // Cannot completely remove virtual function even in release mode.
3729#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3730 dbgs() << "GenericScheduler RegionPolicy: "
3731 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3732 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3733 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3734 << "\n";
3735#endif
3736}
3737
3738/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3739/// critical path by more cycles than it takes to drain the instruction buffer.
3740/// We estimate an upper bounds on in-flight instructions as:
3741///
3742/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3743/// InFlightIterations = AcyclicPath / CyclesPerIteration
3744/// InFlightResources = InFlightIterations * LoopResources
3745///
3746/// TODO: Check execution resources in addition to IssueCount.
3748 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3749 return;
3750
3751 // Scaled number of cycles per loop iteration.
3752 unsigned IterCount =
3753 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3754 Rem.RemIssueCount);
3755 // Scaled acyclic critical path.
3756 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3757 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3758 unsigned InFlightCount =
3759 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3760 unsigned BufferLimit =
3761 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3762
3763 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3764
3765 LLVM_DEBUG(
3766 dbgs() << "IssueCycles="
3767 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3768 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3769 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3770 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3771 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3772 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3773}
3774
3776 Rem.CriticalPath = DAG->ExitSU.getDepth();
3777
3778 // Some roots may not feed into ExitSU. Check all of them in case.
3779 for (const SUnit *SU : Bot.Available) {
3780 if (SU->getDepth() > Rem.CriticalPath)
3781 Rem.CriticalPath = SU->getDepth();
3782 }
3783 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3785 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3786 }
3787
3788 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3789 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3791 }
3792}
3793
3794bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP,
3798 const TargetRegisterInfo *TRI,
3799 const MachineFunction &MF) {
3800 // If one candidate decreases and the other increases, go with it.
3801 // Invalid candidates have UnitInc==0.
3802 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3803 Reason)) {
3804 return true;
3805 }
3806 // Do not compare the magnitude of pressure changes between top and bottom
3807 // boundary.
3808 if (Cand.AtTop != TryCand.AtTop)
3809 return false;
3810
3811 // If both candidates affect the same set in the same boundary, go with the
3812 // smallest increase.
3813 unsigned TryPSet = TryP.getPSetOrMax();
3814 unsigned CandPSet = CandP.getPSetOrMax();
3815 if (TryPSet == CandPSet) {
3816 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3817 Reason);
3818 }
3819
3820 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3821 std::numeric_limits<int>::max();
3822
3823 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3824 std::numeric_limits<int>::max();
3825
3826 // If the candidates are decreasing pressure, reverse priority.
3827 if (TryP.getUnitInc() < 0)
3828 std::swap(TryRank, CandRank);
3829 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3830}
3831
3832unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) {
3833 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3834}
3835
3836/// Minimize physical register live ranges. Regalloc wants them adjacent to
3837/// their physreg def/use.
3838///
3839/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3840/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3841/// with the operation that produces or consumes the physreg. We'll do this when
3842/// regalloc has support for parallel copies.
3843int llvm::biasPhysReg(const SUnit *SU, bool isTop) {
3844 const MachineInstr *MI = SU->getInstr();
3845
3846 if (MI->isCopy()) {
3847 unsigned ScheduledOper = isTop ? 1 : 0;
3848 unsigned UnscheduledOper = isTop ? 0 : 1;
3849 // If we have already scheduled the physreg produce/consumer, immediately
3850 // schedule the copy.
3851 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3852 return 1;
3853 // If the physreg is at the boundary, defer it. Otherwise schedule it
3854 // immediately to free the dependent. We can hoist the copy later.
3855 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3856 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3857 return AtBoundary ? -1 : 1;
3858 }
3859
3860 if (MI->isMoveImmediate()) {
3861 // If we have a move immediate and all successors have been assigned, bias
3862 // towards scheduling this later. Make sure all register defs are to
3863 // physical registers.
3864 bool DoBias = true;
3865 for (const MachineOperand &Op : MI->defs()) {
3866 if (Op.isReg() && !Op.getReg().isPhysical()) {
3867 DoBias = false;
3868 break;
3869 }
3870 }
3871
3872 if (DoBias)
3873 return isTop ? -1 : 1;
3874 }
3875
3876 return 0;
3877}
3878
3880 bool AtTop,
3881 const RegPressureTracker &RPTracker,
3882 RegPressureTracker &TempTracker) {
3883 Cand.SU = SU;
3884 Cand.AtTop = AtTop;
3885 if (DAG->isTrackingPressure()) {
3886 if (AtTop) {
3887 TempTracker.getMaxDownwardPressureDelta(
3888 Cand.SU->getInstr(),
3889 Cand.RPDelta,
3890 DAG->getRegionCriticalPSets(),
3891 DAG->getRegPressure().MaxSetPressure);
3892 } else {
3893 if (VerifyScheduling) {
3894 TempTracker.getMaxUpwardPressureDelta(
3895 Cand.SU->getInstr(),
3896 &DAG->getPressureDiff(Cand.SU),
3897 Cand.RPDelta,
3898 DAG->getRegionCriticalPSets(),
3899 DAG->getRegPressure().MaxSetPressure);
3900 } else {
3901 RPTracker.getUpwardPressureDelta(
3902 Cand.SU->getInstr(),
3903 DAG->getPressureDiff(Cand.SU),
3904 Cand.RPDelta,
3905 DAG->getRegionCriticalPSets(),
3906 DAG->getRegPressure().MaxSetPressure);
3907 }
3908 }
3909 }
3910 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3911 << " Try SU(" << Cand.SU->NodeNum << ") "
3912 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3913 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3914}
3915
3916/// Apply a set of heuristics to a new candidate. Heuristics are currently
3917/// hierarchical. This may be more efficient than a graduated cost model because
3918/// we don't need to evaluate all aspects of the model for each node in the
3919/// queue. But it's really done to make the heuristics easier to debug and
3920/// statistically analyze.
3921///
3922/// \param Cand provides the policy and current best candidate.
3923/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3924/// \param Zone describes the scheduled zone that we are extending, or nullptr
3925/// if Cand is from a different zone than TryCand.
3926/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3928 SchedCandidate &TryCand,
3929 SchedBoundary *Zone) const {
3930 // Initialize the candidate if needed.
3931 if (!Cand.isValid()) {
3932 TryCand.Reason = FirstValid;
3933 return true;
3934 }
3935
3936 // Bias PhysReg Defs and copies to their uses and defined respectively.
3937 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3938 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3939 return TryCand.Reason != NoCand;
3940
3941 // Avoid exceeding the target's limit.
3942 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3943 Cand.RPDelta.Excess,
3944 TryCand, Cand, RegExcess, TRI,
3945 DAG->MF))
3946 return TryCand.Reason != NoCand;
3947
3948 // Avoid increasing the max critical pressure in the scheduled region.
3949 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3950 Cand.RPDelta.CriticalMax,
3951 TryCand, Cand, RegCritical, TRI,
3952 DAG->MF))
3953 return TryCand.Reason != NoCand;
3954
3955 // We only compare a subset of features when comparing nodes between
3956 // Top and Bottom boundary. Some properties are simply incomparable, in many
3957 // other instances we should only override the other boundary if something
3958 // is a clear good pick on one boundary. Skip heuristics that are more
3959 // "tie-breaking" in nature.
3960 bool SameBoundary = Zone != nullptr;
3961 if (SameBoundary) {
3962 // For loops that are acyclic path limited, aggressively schedule for
3963 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3964 // heuristics to take precedence.
3965 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3966 tryLatency(TryCand, Cand, *Zone))
3967 return TryCand.Reason != NoCand;
3968
3969 // Prioritize instructions that read unbuffered resources by stall cycles.
3970 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3971 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3972 return TryCand.Reason != NoCand;
3973 }
3974
3975 // Keep clustered nodes together to encourage downstream peephole
3976 // optimizations which may reduce resource requirements.
3977 //
3978 // This is a best effort to set things up for a post-RA pass. Optimizations
3979 // like generating loads of multiple registers should ideally be done within
3980 // the scheduler pass by combining the loads during DAG postprocessing.
3981 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
3982 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
3983 bool CandIsClusterSucc =
3984 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
3985 bool TryCandIsClusterSucc =
3986 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
3987
3988 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
3989 Cluster))
3990 return TryCand.Reason != NoCand;
3991
3992 if (SameBoundary) {
3993 // Weak edges are for clustering and other constraints.
3994 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3995 getWeakLeft(Cand.SU, Cand.AtTop),
3996 TryCand, Cand, Weak))
3997 return TryCand.Reason != NoCand;
3998 }
3999
4000 // Avoid increasing the max pressure of the entire region.
4001 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
4002 Cand.RPDelta.CurrentMax,
4003 TryCand, Cand, RegMax, TRI,
4004 DAG->MF))
4005 return TryCand.Reason != NoCand;
4006
4007 if (SameBoundary) {
4008 // Avoid critical resource consumption and balance the schedule.
4011 TryCand, Cand, ResourceReduce))
4012 return TryCand.Reason != NoCand;
4015 TryCand, Cand, ResourceDemand))
4016 return TryCand.Reason != NoCand;
4017
4018 // Avoid serializing long latency dependence chains.
4019 // For acyclic path limited loops, latency was already checked above.
4020 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
4021 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
4022 return TryCand.Reason != NoCand;
4023
4024 // Fall through to original instruction order.
4025 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
4026 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
4027 TryCand.Reason = NodeOrder;
4028 return true;
4029 }
4030 }
4031
4032 return false;
4033}
4034
4035/// Pick the best candidate from the queue.
4036///
4037/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
4038/// DAG building. To adjust for the current scheduling location we need to
4039/// maintain the number of vreg uses remaining to be top-scheduled.
4041 const CandPolicy &ZonePolicy,
4042 const RegPressureTracker &RPTracker,
4043 SchedCandidate &Cand) {
4044 // getMaxPressureDelta temporarily modifies the tracker.
4045 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
4046
4047 ReadyQueue &Q = Zone.Available;
4048 for (SUnit *SU : Q) {
4049
4050 SchedCandidate TryCand(ZonePolicy);
4051 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
4052 // Pass SchedBoundary only when comparing nodes from the same boundary.
4053 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
4054 if (tryCandidate(Cand, TryCand, ZoneArg)) {
4055 // Initialize resource delta if needed in case future heuristics query it.
4056 if (TryCand.ResDelta == SchedResourceDelta())
4058 Cand.setBest(TryCand);
4060 }
4061 }
4062}
4063
4064/// Pick the best candidate node from either the top or bottom queue.
4066 // Schedule as far as possible in the direction of no choice. This is most
4067 // efficient, but also provides the best heuristics for CriticalPSets.
4068 if (SUnit *SU = Bot.pickOnlyChoice()) {
4069 IsTopNode = false;
4070 tracePick(Only1, /*IsTopNode=*/false);
4071 return SU;
4072 }
4073 if (SUnit *SU = Top.pickOnlyChoice()) {
4074 IsTopNode = true;
4075 tracePick(Only1, /*IsTopNode=*/true);
4076 return SU;
4077 }
4078 // Set the bottom-up policy based on the state of the current bottom zone and
4079 // the instructions outside the zone, including the top zone.
4080 CandPolicy BotPolicy;
4081 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
4082 // Set the top-down policy based on the state of the current top zone and
4083 // the instructions outside the zone, including the bottom zone.
4084 CandPolicy TopPolicy;
4085 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
4086
4087 // See if BotCand is still valid (because we previously scheduled from Top).
4088 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4089 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4090 BotCand.Policy != BotPolicy) {
4091 BotCand.reset(CandPolicy());
4092 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
4093 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4094 } else {
4096#ifndef NDEBUG
4097 if (VerifyScheduling) {
4098 SchedCandidate TCand;
4099 TCand.reset(CandPolicy());
4100 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
4101 assert(TCand.SU == BotCand.SU &&
4102 "Last pick result should correspond to re-picking right now");
4103 }
4104#endif
4105 }
4106
4107 // Check if the top Q has a better candidate.
4108 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4109 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4110 TopCand.Policy != TopPolicy) {
4111 TopCand.reset(CandPolicy());
4112 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
4113 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4114 } else {
4116#ifndef NDEBUG
4117 if (VerifyScheduling) {
4118 SchedCandidate TCand;
4119 TCand.reset(CandPolicy());
4120 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
4121 assert(TCand.SU == TopCand.SU &&
4122 "Last pick result should correspond to re-picking right now");
4123 }
4124#endif
4125 }
4126
4127 // Pick best from BotCand and TopCand.
4128 assert(BotCand.isValid());
4129 assert(TopCand.isValid());
4130 SchedCandidate Cand = BotCand;
4131 TopCand.Reason = NoCand;
4132 if (tryCandidate(Cand, TopCand, nullptr)) {
4133 Cand.setBest(TopCand);
4135 }
4136
4137 IsTopNode = Cand.AtTop;
4138 tracePick(Cand);
4139 return Cand.SU;
4140}
4141
4142/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
4144 if (DAG->top() == DAG->bottom()) {
4145 assert(Top.Available.empty() && Top.Pending.empty() &&
4146 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4147 return nullptr;
4148 }
4149 SUnit *SU;
4150 if (RegionPolicy.OnlyTopDown) {
4151 SU = Top.pickOnlyChoice();
4152 if (!SU) {
4153 CandPolicy NoPolicy;
4154 TopCand.reset(NoPolicy);
4155 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
4156 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4158 SU = TopCand.SU;
4159 }
4160 IsTopNode = true;
4161 } else if (RegionPolicy.OnlyBottomUp) {
4162 SU = Bot.pickOnlyChoice();
4163 if (!SU) {
4164 CandPolicy NoPolicy;
4165 BotCand.reset(NoPolicy);
4166 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
4167 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4169 SU = BotCand.SU;
4170 }
4171 IsTopNode = false;
4172 } else {
4173 SU = pickNodeBidirectional(IsTopNode);
4174 }
4175 assert(!SU->isScheduled && "SUnit scheduled twice.");
4176
4177 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
4178 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
4179 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
4180 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
4181 //
4182 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
4183 // isBottomReady. That is, it didn't factor into the decision to choose SU
4184 // because it isTopReady or isBottomReady, respectively. In fact, if the
4185 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
4186 // queues respectivley contain the original roots and don't get updated when
4187 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
4188 // because we schduled everything but the top roots. Conversley, if SU
4189 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
4190 // but the bottom roots. If its in a queue even coincidentally, it should be
4191 // removed so it does not get re-picked in a subsequent pickNode call.
4192 if (SU->isTopReady())
4193 Top.removeReady(SU);
4194 if (SU->isBottomReady())
4195 Bot.removeReady(SU);
4196
4197 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4198 << *SU->getInstr());
4199
4200 if (IsTopNode) {
4201 if (SU->NodeNum == TopIdx++)
4202 ++NumInstrsInSourceOrderPreRA;
4203 } else {
4204 assert(BotIdx < NumRegionInstrs && "out of bounds");
4205 if (SU->NodeNum == BotIdx--)
4206 ++NumInstrsInSourceOrderPreRA;
4207 }
4208
4209 NumInstrsScheduledPreRA += 1;
4210
4211 return SU;
4212}
4213
4215 MachineBasicBlock::iterator InsertPos = SU->getInstr();
4216 if (!isTop)
4217 ++InsertPos;
4218 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
4219
4220 // Find already scheduled copies with a single physreg dependence and move
4221 // them just above the scheduled instruction.
4222 for (SDep &Dep : Deps) {
4223 if (Dep.getKind() != SDep::Data || !Dep.getReg().isPhysical())
4224 continue;
4225 SUnit *DepSU = Dep.getSUnit();
4226 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
4227 continue;
4228 MachineInstr *Copy = DepSU->getInstr();
4229 if (!Copy->isCopy() && !Copy->isMoveImmediate())
4230 continue;
4231 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
4232 DAG->dumpNode(*Dep.getSUnit()));
4233 DAG->moveInstruction(Copy, InsertPos);
4234 }
4235}
4236
4237/// Update the scheduler's state after scheduling a node. This is the same node
4238/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
4239/// update it's state based on the current cycle before MachineSchedStrategy
4240/// does.
4241///
4242/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
4243/// them here. See comments in biasPhysReg.
4244void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4245 if (IsTopNode) {
4246 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4248 LLVM_DEBUG({
4250 ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
4251 dbgs() << " Top Cluster: ";
4252 for (auto *N : *TopCluster)
4253 dbgs() << N->NodeNum << '\t';
4254 dbgs() << '\n';
4255 }
4256 });
4257 Top.bumpNode(SU);
4258 if (SU->hasPhysRegUses)
4259 reschedulePhysReg(SU, true);
4260 } else {
4261 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4263 LLVM_DEBUG({
4265 ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
4266 dbgs() << " Bot Cluster: ";
4267 for (auto *N : *BotCluster)
4268 dbgs() << N->NodeNum << '\t';
4269 dbgs() << '\n';
4270 }
4271 });
4272 Bot.bumpNode(SU);
4273 if (SU->hasPhysRegDefs)
4274 reschedulePhysReg(SU, false);
4275 }
4276}
4277
4281
4282static MachineSchedRegistry
4283GenericSchedRegistry("converge", "Standard converging scheduler.",
4285
4286//===----------------------------------------------------------------------===//
4287// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4288//===----------------------------------------------------------------------===//
4289
4291 DAG = Dag;
4292 SchedModel = DAG->getSchedModel();
4293 TRI = DAG->TRI;
4294
4295 Rem.init(DAG, SchedModel);
4296 Top.init(DAG, SchedModel, &Rem);
4297 Bot.init(DAG, SchedModel, &Rem);
4298
4299 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4300 // or are disabled, then these HazardRecs will be disabled.
4301 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
4302 if (!Top.HazardRec) {
4303 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4304 }
4305 if (!Bot.HazardRec) {
4306 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4307 }
4310}
4311
4314 unsigned NumRegionInstrs) {
4315 const MachineFunction &MF = *Begin->getMF();
4316
4317 // Default to top-down because it was implemented first and existing targets
4318 // expect that behavior by default.
4319 RegionPolicy.OnlyTopDown = true;
4320 RegionPolicy.OnlyBottomUp = false;
4321
4322 // Allow the subtarget to override default policy.
4323 SchedRegion Region(Begin, End, NumRegionInstrs);
4325
4326 // After subtarget overrides, apply command line options.
4328 RegionPolicy.OnlyTopDown = true;
4329 RegionPolicy.OnlyBottomUp = false;
4330 } else if (PostRADirection == MISched::BottomUp) {
4331 RegionPolicy.OnlyTopDown = false;
4332 RegionPolicy.OnlyBottomUp = true;
4334 RegionPolicy.OnlyBottomUp = false;
4335 RegionPolicy.OnlyTopDown = false;
4336 }
4337
4338 BotIdx = NumRegionInstrs - 1;
4339 this->NumRegionInstrs = NumRegionInstrs;
4340}
4341
4343 Rem.CriticalPath = DAG->ExitSU.getDepth();
4344
4345 // Some roots may not feed into ExitSU. Check all of them in case.
4346 for (const SUnit *SU : Bot.Available) {
4347 if (SU->getDepth() > Rem.CriticalPath)
4348 Rem.CriticalPath = SU->getDepth();
4349 }
4350 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
4352 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4353 }
4354}
4355
4356/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4357///
4358/// \param Cand provides the policy and current best candidate.
4359/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4360/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4362 SchedCandidate &TryCand) {
4363 // Initialize the candidate if needed.
4364 if (!Cand.isValid()) {
4365 TryCand.Reason = FirstValid;
4366 return true;
4367 }
4368
4369 // Prioritize instructions that read unbuffered resources by stall cycles.
4370 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
4371 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
4372 return TryCand.Reason != NoCand;
4373
4374 // Keep clustered nodes together.
4375 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
4376 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
4377 bool CandIsClusterSucc =
4378 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
4379 bool TryCandIsClusterSucc =
4380 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
4381
4382 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
4383 Cluster))
4384 return TryCand.Reason != NoCand;
4385 // Avoid critical resource consumption and balance the schedule.
4387 TryCand, Cand, ResourceReduce))
4388 return TryCand.Reason != NoCand;
4391 TryCand, Cand, ResourceDemand))
4392 return TryCand.Reason != NoCand;
4393
4394 // We only compare a subset of features when comparing nodes between
4395 // Top and Bottom boundary.
4396 if (Cand.AtTop == TryCand.AtTop) {
4397 // Avoid serializing long latency dependence chains.
4398 if (Cand.Policy.ReduceLatency &&
4399 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
4400 return TryCand.Reason != NoCand;
4401 }
4402
4403 // Fall through to original instruction order.
4404 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4405 TryCand.Reason = NodeOrder;
4406 return true;
4407 }
4408
4409 return false;
4410}
4411
4413 SchedCandidate &Cand) {
4414 ReadyQueue &Q = Zone.Available;
4415 for (SUnit *SU : Q) {
4416 SchedCandidate TryCand(Cand.Policy);
4417 TryCand.SU = SU;
4418 TryCand.AtTop = Zone.isTop();
4420 if (tryCandidate(Cand, TryCand)) {
4421 Cand.setBest(TryCand);
4423 }
4424 }
4425}
4426
4427/// Pick the best candidate node from either the top or bottom queue.
4429 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4430 // out common parts.
4431
4432 // Schedule as far as possible in the direction of no choice. This is most
4433 // efficient, but also provides the best heuristics for CriticalPSets.
4434 if (SUnit *SU = Bot.pickOnlyChoice()) {
4435 IsTopNode = false;
4436 tracePick(Only1, /*IsTopNode=*/false, /*IsPostRA=*/true);
4437 return SU;
4438 }
4439 if (SUnit *SU = Top.pickOnlyChoice()) {
4440 IsTopNode = true;
4441 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4442 return SU;
4443 }
4444 // Set the bottom-up policy based on the state of the current bottom zone and
4445 // the instructions outside the zone, including the top zone.
4446 CandPolicy BotPolicy;
4447 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4448 // Set the top-down policy based on the state of the current top zone and
4449 // the instructions outside the zone, including the bottom zone.
4450 CandPolicy TopPolicy;
4451 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4452
4453 // See if BotCand is still valid (because we previously scheduled from Top).
4454 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4455 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4456 BotCand.Policy != BotPolicy) {
4457 BotCand.reset(CandPolicy());
4459 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4460 } else {
4462#ifndef NDEBUG
4463 if (VerifyScheduling) {
4464 SchedCandidate TCand;
4465 TCand.reset(CandPolicy());
4467 assert(TCand.SU == BotCand.SU &&
4468 "Last pick result should correspond to re-picking right now");
4469 }
4470#endif
4471 }
4472
4473 // Check if the top Q has a better candidate.
4474 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4475 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4476 TopCand.Policy != TopPolicy) {
4477 TopCand.reset(CandPolicy());
4479 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4480 } else {
4482#ifndef NDEBUG
4483 if (VerifyScheduling) {
4484 SchedCandidate TCand;
4485 TCand.reset(CandPolicy());
4487 assert(TCand.SU == TopCand.SU &&
4488 "Last pick result should correspond to re-picking right now");
4489 }
4490#endif
4491 }
4492
4493 // Pick best from BotCand and TopCand.
4494 assert(BotCand.isValid());
4495 assert(TopCand.isValid());
4496 SchedCandidate Cand = BotCand;
4497 TopCand.Reason = NoCand;
4498 if (tryCandidate(Cand, TopCand)) {
4499 Cand.setBest(TopCand);
4501 }
4502
4503 IsTopNode = Cand.AtTop;
4504 tracePick(Cand, /*IsPostRA=*/true);
4505 return Cand.SU;
4506}
4507
4508/// Pick the next node to schedule.
4510 if (DAG->top() == DAG->bottom()) {
4511 assert(Top.Available.empty() && Top.Pending.empty() &&
4512 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4513 return nullptr;
4514 }
4515 SUnit *SU;
4516 if (RegionPolicy.OnlyBottomUp) {
4517 SU = Bot.pickOnlyChoice();
4518 if (SU) {
4519 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4520 } else {
4521 CandPolicy NoPolicy;
4522 BotCand.reset(NoPolicy);
4523 // Set the bottom-up policy based on the state of the current bottom
4524 // zone and the instructions outside the zone, including the top zone.
4525 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4527 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4528 tracePick(BotCand, /*IsPostRA=*/true);
4529 SU = BotCand.SU;
4530 }
4531 IsTopNode = false;
4532 } else if (RegionPolicy.OnlyTopDown) {
4533 SU = Top.pickOnlyChoice();
4534 if (SU) {
4535 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4536 } else {
4537 CandPolicy NoPolicy;
4538 TopCand.reset(NoPolicy);
4539 // Set the top-down policy based on the state of the current top zone
4540 // and the instructions outside the zone, including the bottom zone.
4541 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4543 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4544 tracePick(TopCand, /*IsPostRA=*/true);
4545 SU = TopCand.SU;
4546 }
4547 IsTopNode = true;
4548 } else {
4549 SU = pickNodeBidirectional(IsTopNode);
4550 }
4551 assert(!SU->isScheduled && "SUnit scheduled twice.");
4552
4553 if (SU->isTopReady())
4554 Top.removeReady(SU);
4555 if (SU->isBottomReady())
4556 Bot.removeReady(SU);
4557
4558 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4559 << *SU->getInstr());
4560
4561 if (IsTopNode) {
4562 if (SU->NodeNum == TopIdx++)
4563 ++NumInstrsInSourceOrderPostRA;
4564 } else {
4565 assert(BotIdx < NumRegionInstrs && "out of bounds");
4566 if (SU->NodeNum == BotIdx--)
4567 ++NumInstrsInSourceOrderPostRA;
4568 }
4569
4570 NumInstrsScheduledPostRA += 1;
4571
4572 return SU;
4573}
4574
4575/// Called after ScheduleDAGMI has scheduled an instruction and updated
4576/// scheduled/remaining flags in the DAG nodes.
4577void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4578 if (IsTopNode) {
4579 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4581 Top.bumpNode(SU);
4582 } else {
4583 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4585 Bot.bumpNode(SU);
4586 }
4587}
4588
4589//===----------------------------------------------------------------------===//
4590// ILP Scheduler. Currently for experimental analysis of heuristics.
4591//===----------------------------------------------------------------------===//
4592
4593namespace {
4594
4595/// Order nodes by the ILP metric.
4596struct ILPOrder {
4597 const SchedDFSResult *DFSResult = nullptr;
4598 const BitVector *ScheduledTrees = nullptr;
4599 bool MaximizeILP;
4600
4601 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4602
4603 /// Apply a less-than relation on node priority.
4604 ///
4605 /// (Return true if A comes after B in the Q.)
4606 bool operator()(const SUnit *A, const SUnit *B) const {
4607 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4608 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4609 if (SchedTreeA != SchedTreeB) {
4610 // Unscheduled trees have lower priority.
4611 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4612 return ScheduledTrees->test(SchedTreeB);
4613
4614 // Trees with shallower connections have lower priority.
4615 if (DFSResult->getSubtreeLevel(SchedTreeA)
4616 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4617 return DFSResult->getSubtreeLevel(SchedTreeA)
4618 < DFSResult->getSubtreeLevel(SchedTreeB);
4619 }
4620 }
4621 if (MaximizeILP)
4622 return DFSResult->getILP(A) < DFSResult->getILP(B);
4623 else
4624 return DFSResult->getILP(A) > DFSResult->getILP(B);
4625 }
4626};
4627
4628/// Schedule based on the ILP metric.
4629class ILPScheduler : public MachineSchedStrategy {
4630 ScheduleDAGMILive *DAG = nullptr;
4631 ILPOrder Cmp;
4632
4633 std::vector<SUnit*> ReadyQ;
4634
4635public:
4636 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4637
4638 void initialize(ScheduleDAGMI *dag) override {
4639 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4640 DAG = static_cast<ScheduleDAGMILive*>(dag);
4641 DAG->computeDFSResult();
4642 Cmp.DFSResult = DAG->getDFSResult();
4643 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4644 ReadyQ.clear();
4645 }
4646
4647 void registerRoots() override {
4648 // Restore the heap in ReadyQ with the updated DFS results.
4649 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4650 }
4651
4652 /// Implement MachineSchedStrategy interface.
4653 /// -----------------------------------------
4654
4655 /// Callback to select the highest priority node from the ready Q.
4656 SUnit *pickNode(bool &IsTopNode) override {
4657 if (ReadyQ.empty()) return nullptr;
4658 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4659 SUnit *SU = ReadyQ.back();
4660 ReadyQ.pop_back();
4661 IsTopNode = false;
4662 LLVM_DEBUG(dbgs() << "Pick node "
4663 << "SU(" << SU->NodeNum << ") "
4664 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4665 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4666 << " @"
4667 << DAG->getDFSResult()->getSubtreeLevel(
4668 DAG->getDFSResult()->getSubtreeID(SU))
4669 << '\n'
4670 << "Scheduling " << *SU->getInstr());
4671 return SU;
4672 }
4673
4674 /// Scheduler callback to notify that a new subtree is scheduled.
4675 void scheduleTree(unsigned SubtreeID) override {
4676 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4677 }
4678
4679 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4680 /// DFSResults, and resort the priority Q.
4681 void schedNode(SUnit *SU, bool IsTopNode) override {
4682 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4683 }
4684
4685 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4686
4687 void releaseBottomNode(SUnit *SU) override {
4688 ReadyQ.push_back(SU);
4689 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4690 }
4691};
4692
4693} // end anonymous namespace
4694
4696 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4697}
4699 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4700}
4701
4703 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4705 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4706
4707//===----------------------------------------------------------------------===//
4708// Machine Instruction Shuffler for Correctness Testing
4709//===----------------------------------------------------------------------===//
4710
4711#ifndef NDEBUG
4712namespace {
4713
4714/// Apply a less-than relation on the node order, which corresponds to the
4715/// instruction order prior to scheduling. IsReverse implements greater-than.
4716template<bool IsReverse>
4717struct SUnitOrder {
4718 bool operator()(SUnit *A, SUnit *B) const {
4719 if (IsReverse)
4720 return A->NodeNum > B->NodeNum;
4721 else
4722 return A->NodeNum < B->NodeNum;
4723 }
4724};
4725
4726/// Reorder instructions as much as possible.
4727class InstructionShuffler : public MachineSchedStrategy {
4728 bool IsAlternating;
4729 bool IsTopDown;
4730
4731 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4732 // gives nodes with a higher number higher priority causing the latest
4733 // instructions to be scheduled first.
4734 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4735 TopQ;
4736
4737 // When scheduling bottom-up, use greater-than as the queue priority.
4738 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4739 BottomQ;
4740
4741public:
4742 InstructionShuffler(bool alternate, bool topdown)
4743 : IsAlternating(alternate), IsTopDown(topdown) {}
4744
4745 void initialize(ScheduleDAGMI*) override {
4746 TopQ.clear();
4747 BottomQ.clear();
4748 }
4749
4750 /// Implement MachineSchedStrategy interface.
4751 /// -----------------------------------------
4752
4753 SUnit *pickNode(bool &IsTopNode) override {
4754 SUnit *SU;
4755 if (IsTopDown) {
4756 do {
4757 if (TopQ.empty()) return nullptr;
4758 SU = TopQ.top();
4759 TopQ.pop();
4760 } while (SU->isScheduled);
4761 IsTopNode = true;
4762 } else {
4763 do {
4764 if (BottomQ.empty()) return nullptr;
4765 SU = BottomQ.top();
4766 BottomQ.pop();
4767 } while (SU->isScheduled);
4768 IsTopNode = false;
4769 }
4770 if (IsAlternating)
4771 IsTopDown = !IsTopDown;
4772 return SU;
4773 }
4774
4775 void schedNode(SUnit *SU, bool IsTopNode) override {}
4776
4777 void releaseTopNode(SUnit *SU) override {
4778 TopQ.push(SU);
4779 }
4780 void releaseBottomNode(SUnit *SU) override {
4781 BottomQ.push(SU);
4782 }
4783};
4784
4785} // end anonymous namespace
4786
4788 bool Alternate =
4790 bool TopDown = PreRADirection != MISched::BottomUp;
4791 return new ScheduleDAGMILive(
4792 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4793}
4794
4796 "shuffle", "Shuffle machine instructions alternating directions",
4798#endif // !NDEBUG
4799
4800//===----------------------------------------------------------------------===//
4801// GraphWriter support for ScheduleDAGMILive.
4802//===----------------------------------------------------------------------===//
4803
4804#ifndef NDEBUG
4805
4806template <>
4809
4810template <>
4813
4814 static std::string getGraphName(const ScheduleDAG *G) {
4815 return std::string(G->MF.getName());
4816 }
4817
4819 return true;
4820 }
4821
4822 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4823 if (ViewMISchedCutoff == 0)
4824 return false;
4825 return (Node->Preds.size() > ViewMISchedCutoff
4826 || Node->Succs.size() > ViewMISchedCutoff);
4827 }
4828
4829 /// If you want to override the dot attributes printed for a particular
4830 /// edge, override this method.
4831 static std::string getEdgeAttributes(const SUnit *Node,
4832 SUnitIterator EI,
4833 const ScheduleDAG *Graph) {
4834 if (EI.isArtificialDep())
4835 return "color=cyan,style=dashed";
4836 if (EI.isCtrlDep())
4837 return "color=blue,style=dashed";
4838 return "";
4839 }
4840
4841 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4842 std::string Str;
4843 raw_string_ostream SS(Str);
4844 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4845 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4846 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4847 SS << "SU:" << SU->NodeNum;
4848 if (DFS)
4849 SS << " I:" << DFS->getNumInstrs(SU);
4850 return Str;
4851 }
4852
4853 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4854 return G->getGraphNodeLabel(SU);
4855 }
4856
4857 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4858 std::string Str("shape=Mrecord");
4859 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4860 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4861 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4862 if (DFS) {
4863 Str += ",style=filled,fillcolor=\"#";
4864 Str += DOT::getColorString(DFS->getSubtreeID(N));
4865 Str += '"';
4866 }
4867 return Str;
4868 }
4869};
4870
4871#endif // NDEBUG
4872
4873/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4874/// rendered using 'dot'.
4875void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4876#ifndef NDEBUG
4877 ViewGraph(this, Name, false, Title);
4878#else
4879 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4880 << "systems with Graphviz or gv!\n";
4881#endif // NDEBUG
4882}
4883
4884/// Out-of-line implementation with no arguments is handy for gdb.
4886 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4887}
4888
4889/// Sort predicate for the intervals stored in an instance of
4890/// ResourceSegments. Intervals are always disjoint (no intersection
4891/// for any pairs of intervals), therefore we can sort the totality of
4892/// the intervals by looking only at the left boundary.
4895 return A.first < B.first;
4896}
4897
4898unsigned ResourceSegments::getFirstAvailableAt(
4899 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4900 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4901 IntervalBuilder) const {
4902 assert(llvm::is_sorted(_Intervals, sortIntervals) &&
4903 "Cannot execute on an un-sorted set of intervals.");
4904
4905 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4906 // a ResourceSegment interval for that situation.
4907 if (AcquireAtCycle == ReleaseAtCycle)
4908 return CurrCycle;
4909
4910 unsigned RetCycle = CurrCycle;
4911 ResourceSegments::IntervalTy NewInterval =
4912 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4913 for (auto &Interval : _Intervals) {
4914 if (!intersects(NewInterval, Interval))
4915 continue;
4916
4917 // Move the interval right next to the top of the one it
4918 // intersects.
4919 assert(Interval.second > NewInterval.first &&
4920 "Invalid intervals configuration.");
4921 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4922 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4923 }
4924 return RetCycle;
4925}
4926
4928 const unsigned CutOff) {
4929 assert(A.first <= A.second && "Cannot add negative resource usage");
4930 assert(CutOff > 0 && "0-size interval history has no use.");
4931 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4932 // instruction needed the resource to be available but does not use it.
4933 // However, ResourceSegment represents an interval that is closed on the left
4934 // and open on the right. It is impossible to represent an empty interval when
4935 // the left is closed. Do not add it to Intervals.
4936 if (A.first == A.second)
4937 return;
4938
4939 assert(all_of(_Intervals,
4940 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4941 return !intersects(A, Interval);
4942 }) &&
4943 "A resource is being overwritten");
4944 _Intervals.push_back(A);
4945
4946 sortAndMerge();
4947
4948 // Do not keep the full history of the intervals, just the
4949 // latest #CutOff.
4950 while (_Intervals.size() > CutOff)
4951 _Intervals.pop_front();
4952}
4953
4956 assert(A.first <= A.second && "Invalid interval");
4957 assert(B.first <= B.second && "Invalid interval");
4958
4959 // Share one boundary.
4960 if ((A.first == B.first) || (A.second == B.second))
4961 return true;
4962
4963 // full intersersect: [ *** ) B
4964 // [***) A
4965 if ((A.first > B.first) && (A.second < B.second))
4966 return true;
4967
4968 // right intersect: [ ***) B
4969 // [*** ) A
4970 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4971 return true;
4972
4973 // left intersect: [*** ) B
4974 // [ ***) A
4975 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4976 return true;
4977
4978 return false;
4979}
4980
4981void ResourceSegments::sortAndMerge() {
4982 if (_Intervals.size() <= 1)
4983 return;
4984
4985 // First sort the collection.
4986 _Intervals.sort(sortIntervals);
4987
4988 // can use next because I have at least 2 elements in the list
4989 auto next = std::next(std::begin(_Intervals));
4990 auto E = std::end(_Intervals);
4991 for (; next != E; ++next) {
4992 if (std::prev(next)->second >= next->first) {
4993 next->first = std::prev(next)->first;
4994 _Intervals.erase(std::prev(next));
4995 continue;
4996 }
4997 }
4998}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
Function Alias Analysis false
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, ArrayRef< InsnRange > Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
PostRA Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA=false)
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
SmallVector< SchedRegion, 16 > MBBRegionsVector
static cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PriorityQueue class.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
reverse_iterator rend() const
Definition ArrayRef.h:134
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
reverse_iterator rbegin() const
Definition ArrayRef.h:133
bool test(unsigned Idx) const
Definition BitVector.h:480
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
Register getReg() const
ECValue - The EquivalenceClasses data structure is just a set of these.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Segments::iterator iterator
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
iterator begin()
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
static LocationSize precise(uint64_t Value)
MachineInstrBundleIterator< const MachineInstr > const_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
Representation of each machine instruction.
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static LLVM_ABI MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI MachineSchedulerPass(const TargetMachine *TM)
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SchedCandidate TopCand
Candidate last picked from Top boundary.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
LLVM_ABI void dump(const TargetRegisterInfo &TRI) const
LLVM_ABI void addPressureChange(VirtRegOrUnit VRegOrUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
void clear()
clear - Erase all elements from the queue.
Helpers for implementing custom MachineSchedStrategy classes.
ArrayRef< SUnit * > elements()
LLVM_ABI void dump() const
std::vector< SUnit * >::iterator iterator
StringRef getName() const
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
LLVM_ABI void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
LLVM_ABI void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
LLVM_ABI void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static LLVM_ABI bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
bool isWeak() const
Tests if this a weak dependence.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition ScheduleDAG.h:76
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
@ Weak
Arbitrary weak DAG edge.
Definition ScheduleDAG.h:75
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Register getReg() const
Returns the register associated with this edge.
bool isArtificialDep() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned NumSuccsLeft
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned NumPredsLeft
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
unsigned WeakPredsLeft
bool isBottomReady() const
bool hasPhysRegUses
Has physreg uses.
bool isTopReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
bool isReservedGroup(unsigned PIdx) const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
LLVM_ABI void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
LLVM_ABI void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
LLVM_ABI void dumpScheduledState() const
LLVM_ABI void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
A ScheduleDAG for scheduling lists of MachineInstr.
SmallVector< ClusterInfo > & getClusters()
Returns the array of the clusters.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph(const Twine &Name, const Twine &Title) override
viewGraph - Pop up a ghostview window with the reachable parts of the DAG rendered using 'dot'.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
std::reverse_iterator< const_iterator > const_reverse_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
const MCWriteProcResEntry * ProcResIter
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
LLVM_ABI unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic scheduling policy within a region.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
A raw_ostream that writes to an std::string.
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
ScheduleDAGMILive * createSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
@ Offset
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
void stable_sort(R &&Range)
Definition STLExtras.h:2106
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition Format.h:157
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition LaneBitmask.h:92
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
LLVM_ABI char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
ScheduleDAGMI * createSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
cl::opt< bool > ViewMISchedDAGs
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI cl::opt< bool > VerifyScheduling
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1968
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr unsigned InvalidClusterId
@ Other
Any other memory.
Definition ModRef.h:68
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition Format.h:150
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
SmallPtrSet< SUnit *, 8 > ClusterInfo
Keep record of which SUnit are in the same cluster group.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &)
LLVM_ABI void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI cl::opt< MISched::Direction > PreRADirection
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI void initializePostMachineSchedulerLegacyPass(PassRegistry &)
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
cl::opt< bool > PrintDAGs
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
MachineBlockFrequencyInfo * MBFI
const MachineLoopInfo * MLI
const TargetMachine * TM
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A region of an MBB for scheduling.
Summarize the unscheduled region.
LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.