LLVM 23.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
52#include "llvm/Config/llvm-config.h"
54#include "llvm/MC/LaneBitmask.h"
55#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <limits>
68#include <memory>
69#include <string>
70#include <tuple>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "machine-scheduler"
77
78STATISTIC(NumInstrsInSourceOrderPreRA,
79 "Number of instructions in source order after pre-RA scheduling");
80STATISTIC(NumInstrsInSourceOrderPostRA,
81 "Number of instructions in source order after post-RA scheduling");
82STATISTIC(NumInstrsScheduledPreRA,
83 "Number of instructions scheduled by pre-RA scheduler");
84STATISTIC(NumInstrsScheduledPostRA,
85 "Number of instructions scheduled by post-RA scheduler");
86STATISTIC(NumClustered, "Number of load/store pairs clustered");
87
88STATISTIC(NumTopPreRA,
89 "Number of scheduling units chosen from top queue pre-RA");
90STATISTIC(NumBotPreRA,
91 "Number of scheduling units chosen from bottom queue pre-RA");
92STATISTIC(NumNoCandPreRA,
93 "Number of scheduling units chosen for NoCand heuristic pre-RA");
94STATISTIC(NumOnly1PreRA,
95 "Number of scheduling units chosen for Only1 heuristic pre-RA");
96STATISTIC(NumPhysRegPreRA,
97 "Number of scheduling units chosen for PhysReg heuristic pre-RA");
98STATISTIC(NumRegExcessPreRA,
99 "Number of scheduling units chosen for RegExcess heuristic pre-RA");
100STATISTIC(NumRegCriticalPreRA,
101 "Number of scheduling units chosen for RegCritical heuristic pre-RA");
102STATISTIC(NumStallPreRA,
103 "Number of scheduling units chosen for Stall heuristic pre-RA");
104STATISTIC(NumClusterPreRA,
105 "Number of scheduling units chosen for Cluster heuristic pre-RA");
106STATISTIC(NumWeakPreRA,
107 "Number of scheduling units chosen for Weak heuristic pre-RA");
108STATISTIC(NumRegMaxPreRA,
109 "Number of scheduling units chosen for RegMax heuristic pre-RA");
111 NumResourceReducePreRA,
112 "Number of scheduling units chosen for ResourceReduce heuristic pre-RA");
114 NumResourceDemandPreRA,
115 "Number of scheduling units chosen for ResourceDemand heuristic pre-RA");
117 NumTopDepthReducePreRA,
118 "Number of scheduling units chosen for TopDepthReduce heuristic pre-RA");
120 NumTopPathReducePreRA,
121 "Number of scheduling units chosen for TopPathReduce heuristic pre-RA");
123 NumBotHeightReducePreRA,
124 "Number of scheduling units chosen for BotHeightReduce heuristic pre-RA");
126 NumBotPathReducePreRA,
127 "Number of scheduling units chosen for BotPathReduce heuristic pre-RA");
128STATISTIC(NumNodeOrderPreRA,
129 "Number of scheduling units chosen for NodeOrder heuristic pre-RA");
130STATISTIC(NumFirstValidPreRA,
131 "Number of scheduling units chosen for FirstValid heuristic pre-RA");
132
133STATISTIC(NumTopPostRA,
134 "Number of scheduling units chosen from top queue post-RA");
135STATISTIC(NumBotPostRA,
136 "Number of scheduling units chosen from bottom queue post-RA");
137STATISTIC(NumNoCandPostRA,
138 "Number of scheduling units chosen for NoCand heuristic post-RA");
139STATISTIC(NumOnly1PostRA,
140 "Number of scheduling units chosen for Only1 heuristic post-RA");
141STATISTIC(NumPhysRegPostRA,
142 "Number of scheduling units chosen for PhysReg heuristic post-RA");
143STATISTIC(NumRegExcessPostRA,
144 "Number of scheduling units chosen for RegExcess heuristic post-RA");
146 NumRegCriticalPostRA,
147 "Number of scheduling units chosen for RegCritical heuristic post-RA");
148STATISTIC(NumStallPostRA,
149 "Number of scheduling units chosen for Stall heuristic post-RA");
150STATISTIC(NumClusterPostRA,
151 "Number of scheduling units chosen for Cluster heuristic post-RA");
152STATISTIC(NumWeakPostRA,
153 "Number of scheduling units chosen for Weak heuristic post-RA");
154STATISTIC(NumRegMaxPostRA,
155 "Number of scheduling units chosen for RegMax heuristic post-RA");
157 NumResourceReducePostRA,
158 "Number of scheduling units chosen for ResourceReduce heuristic post-RA");
160 NumResourceDemandPostRA,
161 "Number of scheduling units chosen for ResourceDemand heuristic post-RA");
163 NumTopDepthReducePostRA,
164 "Number of scheduling units chosen for TopDepthReduce heuristic post-RA");
166 NumTopPathReducePostRA,
167 "Number of scheduling units chosen for TopPathReduce heuristic post-RA");
169 NumBotHeightReducePostRA,
170 "Number of scheduling units chosen for BotHeightReduce heuristic post-RA");
172 NumBotPathReducePostRA,
173 "Number of scheduling units chosen for BotPathReduce heuristic post-RA");
174STATISTIC(NumNodeOrderPostRA,
175 "Number of scheduling units chosen for NodeOrder heuristic post-RA");
176STATISTIC(NumFirstValidPostRA,
177 "Number of scheduling units chosen for FirstValid heuristic post-RA");
178
180 "misched-prera-direction", cl::Hidden,
181 cl::desc("Pre reg-alloc list scheduling direction"),
184 clEnumValN(MISched::TopDown, "topdown",
185 "Force top-down pre reg-alloc list scheduling"),
186 clEnumValN(MISched::BottomUp, "bottomup",
187 "Force bottom-up pre reg-alloc list scheduling"),
188 clEnumValN(MISched::Bidirectional, "bidirectional",
189 "Force bidirectional pre reg-alloc list scheduling")));
190
192 "misched-postra-direction", cl::Hidden,
193 cl::desc("Post reg-alloc list scheduling direction"),
196 clEnumValN(MISched::TopDown, "topdown",
197 "Force top-down post reg-alloc list scheduling"),
198 clEnumValN(MISched::BottomUp, "bottomup",
199 "Force bottom-up post reg-alloc list scheduling"),
200 clEnumValN(MISched::Bidirectional, "bidirectional",
201 "Force bidirectional post reg-alloc list scheduling")));
202
203static cl::opt<bool>
205 cl::desc("Print critical path length to stdout"));
206
208 "verify-misched", cl::Hidden,
209 cl::desc("Verify machine instrs before and after machine scheduling"));
210
211#ifndef NDEBUG
213 "view-misched-dags", cl::Hidden,
214 cl::desc("Pop up a window to show MISched dags after they are processed"));
215cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden,
216 cl::desc("Print schedule DAGs"));
218 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
219 cl::desc("Dump resource usage at schedule boundary."));
221 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
222 cl::desc("Show details of invoking getNextResoufceCycle."));
223#else
224const bool llvm::ViewMISchedDAGs = false;
225const bool llvm::PrintDAGs = false;
226static const bool MischedDetailResourceBooking = false;
227#ifdef LLVM_ENABLE_DUMP
228static const bool MISchedDumpReservedCycles = false;
229#endif // LLVM_ENABLE_DUMP
230#endif // NDEBUG
231
232#ifndef NDEBUG
233/// In some situations a few uninteresting nodes depend on nearly all other
234/// nodes in the graph, provide a cutoff to hide them.
235static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
236 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
237
239 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
240
242 cl::desc("Only schedule this function"));
243static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
244 cl::desc("Only schedule this MBB#"));
245#endif // NDEBUG
246
247/// Avoid quadratic complexity in unusually large basic blocks by limiting the
248/// size of the ready lists.
250 cl::desc("Limit ready list to N instructions"), cl::init(256));
251
252static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
253 cl::desc("Enable register pressure scheduling."), cl::init(true));
254
255static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
256 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
257
259 cl::desc("Enable memop clustering."),
260 cl::init(true));
261static cl::opt<bool>
262 ForceFastCluster("force-fast-cluster", cl::Hidden,
263 cl::desc("Switch to fast cluster algorithm with the lost "
264 "of some fusion opportunities"),
265 cl::init(false));
267 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
268 cl::desc("The threshold for fast cluster"),
269 cl::init(1000));
270
271#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
273 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
274 cl::desc("Dump resource usage at schedule boundary."));
276 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
277 cl::desc("Set width of the columns with "
278 "the resources and schedule units"),
279 cl::init(19));
281 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
282 cl::desc("Set width of the columns showing resource booking."),
283 cl::init(5));
285 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
286 cl::desc("Sort the resources printed in the dump trace"));
287#endif
288
290 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
291 cl::desc("Number of intervals to track"), cl::init(10));
292
293// DAG subtrees must have at least this many nodes.
294static const unsigned MinSubtreeSize = 8;
295
296// Pin the vtables to this file.
297void MachineSchedStrategy::anchor() {}
298
299void ScheduleDAGMutation::anchor() {}
300
301//===----------------------------------------------------------------------===//
302// Machine Instruction Scheduling Pass and Registry
303//===----------------------------------------------------------------------===//
304
308
312
313namespace llvm {
314namespace impl_detail {
315
316/// Base class for the machine scheduler classes.
318protected:
319 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
320};
321
322/// Impl class for MachineScheduler.
324 // These are only for using MF.verify()
325 // remove when verify supports passing in all analyses
326 MachineFunctionPass *P = nullptr;
327 MachineFunctionAnalysisManager *MFAM = nullptr;
328
329public:
337
339 // Migration only
340 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
341 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
342
343 bool run(MachineFunction &MF, const TargetMachine &TM,
344 const RequiredAnalyses &Analyses);
345
346protected:
348};
349
350/// Impl class for PostMachineScheduler.
352 // These are only for using MF.verify()
353 // remove when verify supports passing in all analyses
354 MachineFunctionPass *P = nullptr;
355 MachineFunctionAnalysisManager *MFAM = nullptr;
356
357public:
363 // Migration only
364 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
365 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
366
367 bool run(MachineFunction &Func, const TargetMachine &TM,
368 const RequiredAnalyses &Analyses);
369
370protected:
372};
373
374} // namespace impl_detail
375} // namespace llvm
376
380
381namespace {
382/// MachineScheduler runs after coalescing and before register allocation.
383class MachineSchedulerLegacy : public MachineFunctionPass {
384 MachineSchedulerImpl Impl;
385
386public:
387 MachineSchedulerLegacy();
388 void getAnalysisUsage(AnalysisUsage &AU) const override;
389 bool runOnMachineFunction(MachineFunction&) override;
390
391 static char ID; // Class identification, replacement for typeinfo
392};
393
394/// PostMachineScheduler runs after shortly before code emission.
395class PostMachineSchedulerLegacy : public MachineFunctionPass {
396 PostMachineSchedulerImpl Impl;
397
398public:
399 PostMachineSchedulerLegacy();
400 void getAnalysisUsage(AnalysisUsage &AU) const override;
401 bool runOnMachineFunction(MachineFunction &) override;
402
403 static char ID; // Class identification, replacement for typeinfo
404};
405
406} // end anonymous namespace
407
408char MachineSchedulerLegacy::ID = 0;
409
410char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
411
412INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
413 "Machine Instruction Scheduler", false, false)
420INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
421 "Machine Instruction Scheduler", false, false)
422
423MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {}
424
425void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
426 AU.setPreservesCFG();
437}
438
439char PostMachineSchedulerLegacy::ID = 0;
440
441char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
442
443INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
444 "PostRA Machine Instruction Scheduler", false, false)
448INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
449 "PostRA Machine Instruction Scheduler", false, false)
450
451PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
453
454void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
455 AU.setPreservesCFG();
461}
462
465
466/// A dummy default scheduler factory indicates whether the scheduler
467/// is overridden on the command line.
471
472/// MachineSchedOpt allows command line selection of the scheduler.
477 cl::desc("Machine instruction scheduler to use"));
478
480DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
482
484 "enable-misched",
485 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
486 cl::Hidden);
487
489 "enable-post-misched",
490 cl::desc("Enable the post-ra machine instruction scheduling pass."),
491 cl::init(true), cl::Hidden);
492
493/// Decrement this iterator until reaching the top or a non-debug instr.
497 assert(I != Beg && "reached the top of the region, cannot decrement");
498 while (--I != Beg) {
499 if (!I->isDebugOrPseudoInstr())
500 break;
501 }
502 return I;
503}
504
505/// Non-const version.
512
513/// If this iterator is a debug value, increment until reaching the End or a
514/// non-debug instruction.
518 for(; I != End; ++I) {
519 if (!I->isDebugOrPseudoInstr())
520 break;
521 }
522 return I;
523}
524
525/// Non-const version.
532
533/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
535 // Select the scheduler, or set the default.
537 if (Ctor != useDefaultMachineSched)
538 return Ctor(this);
539
540 // Get the default scheduler set by the target for this function.
541 ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
542 if (Scheduler)
543 return Scheduler;
544
545 // Default to GenericScheduler.
546 return createSchedLive(this);
547}
548
550 const RequiredAnalyses &Analyses) {
551 MF = &Func;
552 MLI = &Analyses.MLI;
553 MDT = &Analyses.MDT;
554 this->TM = &TM;
555 AA = &Analyses.AA;
556 LIS = &Analyses.LIS;
557 MBFI = &Analyses.MBFI;
558
559 if (VerifyScheduling) {
560 LLVM_DEBUG(LIS->dump());
561 const char *MSchedBanner = "Before machine scheduling.";
562 if (P)
563 MF->verify(P, MSchedBanner, &errs());
564 else
565 MF->verify(*MFAM, MSchedBanner, &errs());
566 }
567 RegClassInfo->runOnMachineFunction(*MF);
568
569 // Instantiate the selected scheduler for this target, function, and
570 // optimization level.
571 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
572 scheduleRegions(*Scheduler, false);
573
574 LLVM_DEBUG(LIS->dump());
575 if (VerifyScheduling) {
576 const char *MSchedBanner = "After machine scheduling.";
577 if (P)
578 MF->verify(P, MSchedBanner, &errs());
579 else
580 MF->verify(*MFAM, MSchedBanner, &errs());
581 }
582 return true;
583}
584
585/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
586/// the caller. We don't have a command line option to override the postRA
587/// scheduler. The Target must configure it.
589 // Get the postRA scheduler set by the target for this function.
590 ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
591 if (Scheduler)
592 return Scheduler;
593
594 // Default to GenericScheduler.
595 return createSchedPostRA(this);
596}
597
599 const TargetMachine &TM,
600 const RequiredAnalyses &Analyses) {
601 MF = &Func;
602 MLI = &Analyses.MLI;
603 this->TM = &TM;
604 AA = &Analyses.AA;
605
606 if (VerifyScheduling) {
607 const char *PostMSchedBanner = "Before post machine scheduling.";
608 if (P)
609 MF->verify(P, PostMSchedBanner, &errs());
610 else
611 MF->verify(*MFAM, PostMSchedBanner, &errs());
612 }
613
614 // Instantiate the selected scheduler for this target, function, and
615 // optimization level.
616 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
618
619 if (VerifyScheduling) {
620 const char *PostMSchedBanner = "After post machine scheduling.";
621 if (P)
622 MF->verify(P, PostMSchedBanner, &errs());
623 else
624 MF->verify(*MFAM, PostMSchedBanner, &errs());
625 }
626 return true;
627}
628
629/// Top-level MachineScheduler pass driver.
630///
631/// Visit blocks in function order. Divide each block into scheduling regions
632/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
633/// consistent with the DAG builder, which traverses the interior of the
634/// scheduling regions bottom-up.
635///
636/// This design avoids exposing scheduling boundaries to the DAG builder,
637/// simplifying the DAG builder's support for "special" target instructions.
638/// At the same time the design allows target schedulers to operate across
639/// scheduling boundaries, for example to bundle the boundary instructions
640/// without reordering them. This creates complexity, because the target
641/// scheduler must update the RegionBegin and RegionEnd positions cached by
642/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
643/// design would be to split blocks at scheduling boundaries, but LLVM has a
644/// general bias against block splitting purely for implementation simplicity.
645bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
646 if (skipFunction(MF.getFunction()))
647 return false;
648
649 if (EnableMachineSched.getNumOccurrences()) {
651 return false;
652 } else if (!MF.getSubtarget().enableMachineScheduler()) {
653 return false;
654 }
655
656 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
657
658 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
659 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
660 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
661 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
662 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
663 auto &MBFI = getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
664 Impl.setLegacyPass(this);
665 return Impl.run(MF, TM, {MLI, MDT, AA, LIS, MBFI});
666}
667
669 : Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
672 default;
673
675 : Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
677 PostMachineSchedulerPass &&Other) = default;
679
683 if (EnableMachineSched.getNumOccurrences()) {
685 return PreservedAnalyses::all();
686 } else if (!MF.getSubtarget().enableMachineScheduler()) {
687 return PreservedAnalyses::all();
688 }
689
690 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
691 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
692 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
694 .getManager();
695 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
696 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
697 auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
698
699 Impl->setMFAM(&MFAM);
700 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS, MBFI});
701 if (!Changed)
702 return PreservedAnalyses::all();
703
706 .preserve<SlotIndexesAnalysis>()
707 .preserve<LiveIntervalsAnalysis>();
708}
709
710bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
711 if (skipFunction(MF.getFunction()))
712 return false;
713
714 if (EnablePostRAMachineSched.getNumOccurrences()) {
716 return false;
717 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
718 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
719 return false;
720 }
721 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
722 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
723 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
724 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
725 Impl.setLegacyPass(this);
726 return Impl.run(MF, TM, {MLI, AA});
727}
728
732 if (EnablePostRAMachineSched.getNumOccurrences()) {
734 return PreservedAnalyses::all();
735 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
736 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
737 return PreservedAnalyses::all();
738 }
739 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
740 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
742 .getManager();
743 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
744
745 Impl->setMFAM(&MFAM);
746 bool Changed = Impl->run(MF, *TM, {MLI, AA});
747 if (!Changed)
748 return PreservedAnalyses::all();
749
752 return PA;
753}
754
755/// Return true of the given instruction should not be included in a scheduling
756/// region.
757///
758/// MachineScheduler does not currently support scheduling across calls. To
759/// handle calls, the DAG builder needs to be modified to create register
760/// anti/output dependencies on the registers clobbered by the call's regmask
761/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
762/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
763/// the boundary, but there would be no benefit to postRA scheduling across
764/// calls this late anyway.
767 MachineFunction *MF,
768 const TargetInstrInfo *TII) {
769 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
770 MI->isFakeUse();
771}
772
774
775static void
777 MBBRegionsVector &Regions,
778 bool RegionsTopDown) {
779 MachineFunction *MF = MBB->getParent();
781
783 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
784 RegionEnd != MBB->begin(); RegionEnd = I) {
785
786 // Avoid decrementing RegionEnd for blocks with no terminator.
787 if (RegionEnd != MBB->end() ||
788 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
789 --RegionEnd;
790 }
791
792 // The next region starts above the previous region. Look backward in the
793 // instruction stream until we find the nearest boundary.
794 unsigned NumRegionInstrs = 0;
795 I = RegionEnd;
796 for (;I != MBB->begin(); --I) {
797 MachineInstr &MI = *std::prev(I);
798 if (isSchedBoundary(&MI, &*MBB, MF, TII))
799 break;
800 if (!MI.isDebugOrPseudoInstr()) {
801 // MBB::size() uses instr_iterator to count. Here we need a bundle to
802 // count as a single instruction.
803 ++NumRegionInstrs;
804 }
805 }
806
807 // It's possible we found a scheduling region that only has debug
808 // instructions. Don't bother scheduling these.
809 if (NumRegionInstrs != 0)
810 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
811 }
812
813 if (RegionsTopDown)
814 std::reverse(Regions.begin(), Regions.end());
815}
816
817/// Main driver for both MachineScheduler and PostMachineScheduler.
819 bool FixKillFlags) {
820 // Visit all machine basic blocks.
821 //
822 // TODO: Visit blocks in global postorder or postorder within the bottom-up
823 // loop tree. Then we can optionally compute global RegPressure.
824 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
825 MBB != MBBEnd; ++MBB) {
826
827 Scheduler.startBlock(&*MBB);
828
829#ifndef NDEBUG
830 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
831 continue;
832 if (SchedOnlyBlock.getNumOccurrences()
833 && (int)SchedOnlyBlock != MBB->getNumber())
834 continue;
835#endif
836
837 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
838 // points to the scheduling boundary at the bottom of the region. The DAG
839 // does not include RegionEnd, but the region does (i.e. the next
840 // RegionEnd is above the previous RegionBegin). If the current block has
841 // no terminator then RegionEnd == MBB->end() for the bottom region.
842 //
843 // All the regions of MBB are first found and stored in MBBRegions, which
844 // will be processed (MBB) top-down if initialized with true.
845 //
846 // The Scheduler may insert instructions during either schedule() or
847 // exitRegion(), even for empty regions. So the local iterators 'I' and
848 // 'RegionEnd' are invalid across these calls. Instructions must not be
849 // added to other regions than the current one without updating MBBRegions.
850
851 MBBRegionsVector MBBRegions;
852 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
853 bool ScheduleSingleMI = Scheduler.shouldScheduleSingleMIRegions();
854 for (const SchedRegion &R : MBBRegions) {
855 MachineBasicBlock::iterator I = R.RegionBegin;
856 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
857 unsigned NumRegionInstrs = R.NumRegionInstrs;
858
859 // Notify the scheduler of the region, even if we may skip scheduling
860 // it. Perhaps it still needs to be bundled.
861 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
862
863 // Skip empty scheduling regions and, conditionally, regions with a single
864 // MI.
865 if (I == RegionEnd || (!ScheduleSingleMI && I == std::prev(RegionEnd))) {
866 // Close the current region. Bundle the terminator if needed.
867 // This invalidates 'RegionEnd' and 'I'.
868 Scheduler.exitRegion();
869 continue;
870 }
871 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
872 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
873 << " " << MBB->getName() << "\n From: " << *I
874 << " To: ";
875 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
876 else dbgs() << "End\n";
877 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
879 errs() << MF->getName();
880 errs() << ":%bb. " << MBB->getNumber();
881 errs() << " " << MBB->getName() << " \n";
882 }
883
884 // Schedule a region: possibly reorder instructions.
885 // This invalidates the original region iterators.
886 Scheduler.schedule();
887
888 // Close the current region.
889 Scheduler.exitRegion();
890 }
891 Scheduler.finishBlock();
892 // FIXME: Ideally, no further passes should rely on kill flags. However,
893 // thumb2 size reduction is currently an exception, so the PostMIScheduler
894 // needs to do this.
895 if (FixKillFlags)
896 Scheduler.fixupKills(*MBB);
897 }
898 Scheduler.finalizeSchedule();
899}
900
901#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
903 dbgs() << "Queue " << Name << ": ";
904 for (const SUnit *SU : Queue)
905 dbgs() << SU->NodeNum << " ";
906 dbgs() << "\n";
907}
908#endif
909
910//===----------------------------------------------------------------------===//
911// ScheduleDAGMI - Basic machine instruction scheduling. This is
912// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
913// virtual registers.
914// ===----------------------------------------------------------------------===/
915
916// Provide a vtable anchor.
918
919/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
920/// NumPredsLeft reaches zero, release the successor node.
921///
922/// FIXME: Adjust SuccSU height based on MinLatency.
924 SUnit *SuccSU = SuccEdge->getSUnit();
925
926 if (SuccEdge->isWeak()) {
927 --SuccSU->WeakPredsLeft;
928 return;
929 }
930#ifndef NDEBUG
931 if (SuccSU->NumPredsLeft == 0) {
932 dbgs() << "*** Scheduling failed! ***\n";
933 dumpNode(*SuccSU);
934 dbgs() << " has been released too many times!\n";
935 llvm_unreachable(nullptr);
936 }
937#endif
938 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
939 // CurrCycle may have advanced since then.
940 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
941 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
942
943 --SuccSU->NumPredsLeft;
944 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
945 SchedImpl->releaseTopNode(SuccSU);
946}
947
948/// releaseSuccessors - Call releaseSucc on each of SU's successors.
950 for (SDep &Succ : SU->Succs)
951 releaseSucc(SU, &Succ);
952}
953
954/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
955/// NumSuccsLeft reaches zero, release the predecessor node.
956///
957/// FIXME: Adjust PredSU height based on MinLatency.
959 SUnit *PredSU = PredEdge->getSUnit();
960
961 if (PredEdge->isWeak()) {
962 --PredSU->WeakSuccsLeft;
963 return;
964 }
965#ifndef NDEBUG
966 if (PredSU->NumSuccsLeft == 0) {
967 dbgs() << "*** Scheduling failed! ***\n";
968 dumpNode(*PredSU);
969 dbgs() << " has been released too many times!\n";
970 llvm_unreachable(nullptr);
971 }
972#endif
973 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
974 // CurrCycle may have advanced since then.
975 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
976 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
977
978 --PredSU->NumSuccsLeft;
979 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
980 SchedImpl->releaseBottomNode(PredSU);
981}
982
983/// releasePredecessors - Call releasePred on each of SU's predecessors.
985 for (SDep &Pred : SU->Preds)
986 releasePred(SU, &Pred);
987}
988
993
998
999/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
1000/// after crossing a scheduling boundary. [begin, end) includes all instructions
1001/// in the region, including the boundary itself and single-instruction regions
1002/// that don't get scheduled.
1006 unsigned regioninstrs)
1007{
1008 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
1009
1010 SchedImpl->initPolicy(begin, end, regioninstrs);
1011
1012 // Set dump direction after initializing sched policy.
1014 if (SchedImpl->getPolicy().OnlyTopDown)
1016 else if (SchedImpl->getPolicy().OnlyBottomUp)
1018 else
1021}
1022
1023/// This is normally called from the main scheduler loop but may also be invoked
1024/// by the scheduling strategy to perform additional code motion.
1027 // Advance RegionBegin if the first instruction moves down.
1028 if (&*RegionBegin == MI)
1029 ++RegionBegin;
1030
1031 // Update the instruction stream.
1032 BB->splice(InsertPos, BB, MI);
1033
1034 // Update LiveIntervals
1035 if (LIS)
1036 LIS->handleMove(*MI, /*UpdateFlags=*/true);
1037
1038 // Recede RegionBegin if an instruction moves above the first.
1039 if (RegionBegin == InsertPos)
1040 RegionBegin = MI;
1041}
1042
1044#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
1045 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
1047 return false;
1048 }
1049 ++NumInstrsScheduled;
1050#endif
1051 return true;
1052}
1053
1054/// Per-region scheduling driver, called back from
1055/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
1056/// that does not consider liveness or register pressure. It is useful for
1057/// PostRA scheduling and potentially other custom schedulers.
1059 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
1060 LLVM_DEBUG(SchedImpl->dumpPolicy());
1061
1062 // Build the DAG.
1064
1066
1067 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1068 findRootsAndBiasEdges(TopRoots, BotRoots);
1069
1070 LLVM_DEBUG(dump());
1071 if (PrintDAGs) dump();
1073
1074 // Initialize the strategy before modifying the DAG.
1075 // This may initialize a DFSResult to be used for queue priority.
1076 SchedImpl->initialize(this);
1077
1078 // Initialize ready queues now that the DAG and priority data are finalized.
1079 initQueues(TopRoots, BotRoots);
1080
1081 bool IsTopNode = false;
1082 while (true) {
1083 if (!checkSchedLimit())
1084 break;
1085
1086 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1087 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1088 if (!SU) break;
1089
1090 assert(!SU->isScheduled && "Node already scheduled");
1091
1092 MachineInstr *MI = SU->getInstr();
1093 if (IsTopNode) {
1094 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1095 if (&*CurrentTop == MI)
1097 else
1099 } else {
1100 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1103 if (&*priorII == MI)
1104 CurrentBottom = priorII;
1105 else {
1106 if (&*CurrentTop == MI)
1107 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1109 CurrentBottom = MI;
1110 }
1111 }
1112 // Notify the scheduling strategy before updating the DAG.
1113 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1114 // runs, it can then use the accurate ReadyCycle time to determine whether
1115 // newly released nodes can move to the readyQ.
1116 SchedImpl->schedNode(SU, IsTopNode);
1117
1118 updateQueues(SU, IsTopNode);
1119 }
1120 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1121
1123
1124 LLVM_DEBUG({
1125 dbgs() << "*** Final schedule for "
1126 << printMBBReference(*begin()->getParent()) << " ***\n";
1127 dumpSchedule();
1128 dbgs() << '\n';
1129 });
1130}
1131
1132/// Apply each ScheduleDAGMutation step in order.
1134 for (auto &m : Mutations)
1135 m->apply(this);
1136}
1137
1140 SmallVectorImpl<SUnit*> &BotRoots) {
1141 for (SUnit &SU : SUnits) {
1142 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1143
1144 // Order predecessors so DFSResult follows the critical path.
1145 SU.biasCriticalPath();
1146
1147 // A SUnit is ready to top schedule if it has no predecessors.
1148 if (!SU.NumPredsLeft)
1149 TopRoots.push_back(&SU);
1150 // A SUnit is ready to bottom schedule if it has no successors.
1151 if (!SU.NumSuccsLeft)
1152 BotRoots.push_back(&SU);
1153 }
1154 ExitSU.biasCriticalPath();
1155}
1156
1157/// Identify DAG roots and setup scheduler queues.
1159 ArrayRef<SUnit *> BotRoots) {
1160 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1161 //
1162 // Nodes with unreleased weak edges can still be roots.
1163 // Release top roots in forward order.
1164 for (SUnit *SU : TopRoots)
1165 SchedImpl->releaseTopNode(SU);
1166
1167 // Release bottom roots in reverse order so the higher priority nodes appear
1168 // first. This is more natural and slightly more efficient.
1170 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1171 SchedImpl->releaseBottomNode(*I);
1172 }
1173
1176
1177 SchedImpl->registerRoots();
1178
1179 // Advance past initial DebugValues.
1182}
1183
1184/// Update scheduler queues after scheduling an instruction.
1185void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
1186 // Release dependent instructions for scheduling.
1187 if (IsTopNode)
1189 else
1191
1192 SU->isScheduled = true;
1193}
1194
1195/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1197 // If first instruction was a DBG_VALUE then put it back.
1198 if (FirstDbgValue) {
1199 BB->splice(RegionBegin, BB, FirstDbgValue);
1201 }
1202
1203 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1204 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1205 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
1206 MachineInstr *DbgValue = P.first;
1207 MachineBasicBlock::iterator OrigPrevMI = P.second;
1208 if (&*RegionBegin == DbgValue)
1209 ++RegionBegin;
1210 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
1211 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1213 }
1214}
1215
1216#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1217static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1218
1220 // Bail off when there is no schedule model to query.
1221 if (!SchedModel.hasInstrSchedModel())
1222 return;
1223
1224 // Nothing to show if there is no or just one instruction.
1225 if (BB->size() < 2)
1226 return;
1227
1228 dbgs() << " * Schedule table (TopDown):\n";
1229 dbgs() << scheduleTableLegend << "\n";
1230 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1231 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1232 for (MachineInstr &MI : *this) {
1233 SUnit *SU = getSUnit(&MI);
1234 if (!SU)
1235 continue;
1236 const MCSchedClassDesc *SC = getSchedClass(SU);
1237 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1238 PE = SchedModel.getWriteProcResEnd(SC);
1239 PI != PE; ++PI) {
1240 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1241 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1242 }
1243 }
1244 // Print the header with the cycles
1245 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1246 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1247 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1248 dbgs() << "|\n";
1249
1250 for (MachineInstr &MI : *this) {
1251 SUnit *SU = getSUnit(&MI);
1252 if (!SU) {
1253 dbgs() << "Missing SUnit\n";
1254 continue;
1255 }
1256 std::string NodeName("SU(");
1257 NodeName += std::to_string(SU->NodeNum) + ")";
1258 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1259 unsigned C = FirstCycle;
1260 for (; C <= LastCycle; ++C) {
1261 if (C == SU->TopReadyCycle)
1262 dbgs() << llvm::left_justify("| i", ColWidth);
1263 else
1264 dbgs() << llvm::left_justify("|", ColWidth);
1265 }
1266 dbgs() << "|\n";
1267 const MCSchedClassDesc *SC = getSchedClass(SU);
1268
1270 make_range(SchedModel.getWriteProcResBegin(SC),
1271 SchedModel.getWriteProcResEnd(SC)));
1272
1275 ResourcesIt,
1276 [](const MCWriteProcResEntry &LHS,
1277 const MCWriteProcResEntry &RHS) -> bool {
1278 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1279 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1280 });
1281 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1282 C = FirstCycle;
1283 const std::string ResName =
1284 SchedModel.getResourceName(PI.ProcResourceIdx);
1285 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1286 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1287 dbgs() << llvm::left_justify("|", ColWidth);
1288 }
1289 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1290 ++I, ++C)
1291 dbgs() << llvm::left_justify("| x", ColWidth);
1292 while (C++ <= LastCycle)
1293 dbgs() << llvm::left_justify("|", ColWidth);
1294 // Place end char
1295 dbgs() << "| \n";
1296 }
1297 }
1298}
1299
1301 // Bail off when there is no schedule model to query.
1302 if (!SchedModel.hasInstrSchedModel())
1303 return;
1304
1305 // Nothing to show if there is no or just one instruction.
1306 if (BB->size() < 2)
1307 return;
1308
1309 dbgs() << " * Schedule table (BottomUp):\n";
1310 dbgs() << scheduleTableLegend << "\n";
1311
1312 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1313 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1314 for (MachineInstr &MI : *this) {
1315 SUnit *SU = getSUnit(&MI);
1316 if (!SU)
1317 continue;
1318 const MCSchedClassDesc *SC = getSchedClass(SU);
1319 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1320 PE = SchedModel.getWriteProcResEnd(SC);
1321 PI != PE; ++PI) {
1322 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1323 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1324 }
1325 }
1326 // Print the header with the cycles
1327 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1328 for (int C = FirstCycle; C >= LastCycle; --C)
1329 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1330 dbgs() << "|\n";
1331
1332 for (MachineInstr &MI : *this) {
1333 SUnit *SU = getSUnit(&MI);
1334 if (!SU) {
1335 dbgs() << "Missing SUnit\n";
1336 continue;
1337 }
1338 std::string NodeName("SU(");
1339 NodeName += std::to_string(SU->NodeNum) + ")";
1340 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1341 int C = FirstCycle;
1342 for (; C >= LastCycle; --C) {
1343 if (C == (int)SU->BotReadyCycle)
1344 dbgs() << llvm::left_justify("| i", ColWidth);
1345 else
1346 dbgs() << llvm::left_justify("|", ColWidth);
1347 }
1348 dbgs() << "|\n";
1349 const MCSchedClassDesc *SC = getSchedClass(SU);
1351 make_range(SchedModel.getWriteProcResBegin(SC),
1352 SchedModel.getWriteProcResEnd(SC)));
1353
1356 ResourcesIt,
1357 [](const MCWriteProcResEntry &LHS,
1358 const MCWriteProcResEntry &RHS) -> bool {
1359 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1360 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1361 });
1362 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1363 C = FirstCycle;
1364 const std::string ResName =
1365 SchedModel.getResourceName(PI.ProcResourceIdx);
1366 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1367 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1368 dbgs() << llvm::left_justify("|", ColWidth);
1369 }
1370 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1371 ++I, --C)
1372 dbgs() << llvm::left_justify("| x", ColWidth);
1373 while (C-- >= LastCycle)
1374 dbgs() << llvm::left_justify("|", ColWidth);
1375 // Place end char
1376 dbgs() << "| \n";
1377 }
1378 }
1379}
1380#endif
1381
1382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1387 else if (DumpDir == DumpDirection::BottomUp)
1390 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1391 } else {
1392 dbgs() << "* Schedule table: DumpDirection not set.\n";
1393 }
1394 }
1395
1396 for (MachineInstr &MI : *this) {
1397 if (SUnit *SU = getSUnit(&MI))
1398 dumpNode(*SU);
1399 else
1400 dbgs() << "Missing SUnit\n";
1401 }
1402}
1403#endif
1404
1405//===----------------------------------------------------------------------===//
1406// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1407// preservation.
1408//===----------------------------------------------------------------------===//
1409
1413
1415 const MachineInstr &MI = *SU.getInstr();
1416 for (const MachineOperand &MO : MI.operands()) {
1417 if (!MO.isReg())
1418 continue;
1419 if (!MO.readsReg())
1420 continue;
1421 if (TrackLaneMasks && !MO.isUse())
1422 continue;
1423
1424 Register Reg = MO.getReg();
1425 if (!Reg.isVirtual())
1426 continue;
1427
1428 // Ignore re-defs.
1429 if (TrackLaneMasks) {
1430 bool FoundDef = false;
1431 for (const MachineOperand &MO2 : MI.all_defs()) {
1432 if (MO2.getReg() == Reg && !MO2.isDead()) {
1433 FoundDef = true;
1434 break;
1435 }
1436 }
1437 if (FoundDef)
1438 continue;
1439 }
1440
1441 // Record this local VReg use.
1443 for (; UI != VRegUses.end(); ++UI) {
1444 if (UI->SU == &SU)
1445 break;
1446 }
1447 if (UI == VRegUses.end())
1448 VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1449 }
1450}
1451
1452/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1453/// crossing a scheduling boundary. [begin, end) includes all instructions in
1454/// the region, including the boundary itself and single-instruction regions
1455/// that don't get scheduled.
1459 unsigned regioninstrs)
1460{
1461 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1462 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1463
1464 // For convenience remember the end of the liveness region.
1465 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1466
1467 SUPressureDiffs.clear();
1468
1469 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1470 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1471
1473 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1474}
1475
1476// Setup the register pressure trackers for the top scheduled and bottom
1477// scheduled regions.
1479 VRegUses.clear();
1480 VRegUses.setUniverse(MRI.getNumVirtRegs());
1481 for (SUnit &SU : SUnits)
1482 collectVRegUses(SU);
1483
1485 ShouldTrackLaneMasks, false);
1487 ShouldTrackLaneMasks, false);
1488
1489 // Close the RPTracker to finalize live ins.
1490 RPTracker.closeRegion();
1491
1492 LLVM_DEBUG(RPTracker.dump());
1493
1494 // Initialize the live ins and live outs.
1495 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1496 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1497
1498 // Close one end of the tracker so we can call
1499 // getMaxUpward/DownwardPressureDelta before advancing across any
1500 // instructions. This converts currently live regs into live ins/outs.
1501 TopRPTracker.closeTop();
1502 BotRPTracker.closeBottom();
1503
1504 BotRPTracker.initLiveThru(RPTracker);
1505 if (!BotRPTracker.getLiveThru().empty()) {
1506 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1507 LLVM_DEBUG(dbgs() << "Live Thru: ";
1508 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1509 };
1510
1511 // For each live out vreg reduce the pressure change associated with other
1512 // uses of the same vreg below the live-out reaching def.
1513 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1514
1515 // Account for liveness generated by the region boundary.
1516 if (LiveRegionEnd != RegionEnd) {
1518 BotRPTracker.recede(&LiveUses);
1519 updatePressureDiffs(LiveUses);
1520 }
1521
1522 LLVM_DEBUG(dbgs() << "Top Pressure: ";
1523 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1524 dbgs() << "Bottom Pressure: ";
1525 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1526
1527 assert((BotRPTracker.getPos() == RegionEnd ||
1528 (RegionEnd->isDebugInstr() &&
1530 "Can't find the region bottom");
1531
1532 // Cache the list of excess pressure sets in this region. This will also track
1533 // the max pressure in the scheduled code for these sets.
1534 RegionCriticalPSets.clear();
1535 const std::vector<unsigned> &RegionPressure =
1536 RPTracker.getPressure().MaxSetPressure;
1537 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1538 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1539 if (RegionPressure[i] > Limit) {
1540 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1541 << " Actual " << RegionPressure[i] << "\n");
1542 RegionCriticalPSets.push_back(PressureChange(i));
1543 }
1544 }
1545 LLVM_DEBUG({
1546 if (RegionCriticalPSets.size() > 0) {
1547 dbgs() << "Excess PSets: ";
1548 for (const PressureChange &RCPS : RegionCriticalPSets)
1549 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1550 dbgs() << "\n";
1551 }
1552 });
1553}
1554
1557 const std::vector<unsigned> &NewMaxPressure) {
1558 const PressureDiff &PDiff = getPressureDiff(SU);
1559 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1560 for (const PressureChange &PC : PDiff) {
1561 if (!PC.isValid())
1562 break;
1563 unsigned ID = PC.getPSet();
1564 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1565 ++CritIdx;
1566 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1567 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1568 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1569 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1570 }
1571 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1572 if (NewMaxPressure[ID] >= Limit - 2) {
1573 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1574 << NewMaxPressure[ID]
1575 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1576 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1577 << " livethru)\n");
1578 }
1579 }
1580}
1581
1582/// Update the PressureDiff array for liveness after scheduling this
1583/// instruction.
1585 for (const VRegMaskOrUnit &P : LiveUses) {
1586 /// FIXME: Currently assuming single-use physregs.
1587 if (!P.VRegOrUnit.isVirtualReg())
1588 continue;
1589 Register Reg = P.VRegOrUnit.asVirtualReg();
1590
1592 // If the register has just become live then other uses won't change
1593 // this fact anymore => decrement pressure.
1594 // If the register has just become dead then other uses make it come
1595 // back to life => increment pressure.
1596 bool Decrement = P.LaneMask.any();
1597
1598 for (const VReg2SUnit &V2SU
1599 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1600 SUnit &SU = *V2SU.SU;
1601 if (SU.isScheduled || &SU == &ExitSU)
1602 continue;
1603
1604 PressureDiff &PDiff = getPressureDiff(&SU);
1605 PDiff.addPressureChange(VirtRegOrUnit(Reg), Decrement, &MRI);
1606 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1607 return Change.isValid();
1608 }))
1610 << " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1611 << printReg(Reg, TRI) << ':'
1612 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1613 dbgs() << " to "; PDiff.dump(*TRI););
1614 }
1615 } else {
1616 assert(P.LaneMask.any());
1617 LLVM_DEBUG(dbgs() << " LiveReg: " << printReg(Reg, TRI) << "\n");
1618 // This may be called before CurrentBottom has been initialized. However,
1619 // BotRPTracker must have a valid position. We want the value live into the
1620 // instruction or live out of the block, so ask for the previous
1621 // instruction's live-out.
1622 const LiveInterval &LI = LIS->getInterval(Reg);
1623 VNInfo *VNI;
1625 nextIfDebug(BotRPTracker.getPos(), BB->end());
1626 if (I == BB->end())
1627 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1628 else {
1629 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1630 VNI = LRQ.valueIn();
1631 }
1632 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1633 assert(VNI && "No live value at use.");
1634 for (const VReg2SUnit &V2SU
1635 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1636 SUnit *SU = V2SU.SU;
1637 // If this use comes before the reaching def, it cannot be a last use,
1638 // so decrease its pressure change.
1639 if (!SU->isScheduled && SU != &ExitSU) {
1640 LiveQueryResult LRQ =
1641 LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1642 if (LRQ.valueIn() == VNI) {
1643 PressureDiff &PDiff = getPressureDiff(SU);
1644 PDiff.addPressureChange(VirtRegOrUnit(Reg), true, &MRI);
1645 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1646 return Change.isValid();
1647 }))
1648 LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1649 << ") " << *SU->getInstr();
1650 dbgs() << " to ";
1651 PDiff.dump(*TRI););
1652 }
1653 }
1654 }
1655 }
1656 }
1657}
1658
1660#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1661 if (EntrySU.getInstr() != nullptr)
1663 for (const SUnit &SU : SUnits) {
1664 dumpNodeAll(SU);
1665 if (ShouldTrackPressure) {
1666 dbgs() << " Pressure Diff : ";
1667 getPressureDiff(&SU).dump(*TRI);
1668 }
1669 dbgs() << " Single Issue : ";
1670 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1671 SchedModel.mustEndGroup(SU.getInstr()))
1672 dbgs() << "true;";
1673 else
1674 dbgs() << "false;";
1675 dbgs() << '\n';
1676 }
1677 if (ExitSU.getInstr() != nullptr)
1679#endif
1680}
1681
1682/// schedule - Called back from MachineScheduler::runOnMachineFunction
1683/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1684/// only includes instructions that have DAG nodes, not scheduling boundaries.
1685///
1686/// This is a skeletal driver, with all the functionality pushed into helpers,
1687/// so that it can be easily extended by experimental schedulers. Generally,
1688/// implementing MachineSchedStrategy should be sufficient to implement a new
1689/// scheduling algorithm. However, if a scheduler further subclasses
1690/// ScheduleDAGMILive then it will want to override this virtual method in order
1691/// to update any specialized state.
1693 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1694 LLVM_DEBUG(SchedImpl->dumpPolicy());
1696
1698
1699 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1700 findRootsAndBiasEdges(TopRoots, BotRoots);
1701
1702 // Initialize the strategy before modifying the DAG.
1703 // This may initialize a DFSResult to be used for queue priority.
1704 SchedImpl->initialize(this);
1705
1706 LLVM_DEBUG(dump());
1707 if (PrintDAGs) dump();
1709
1710 // Initialize ready queues now that the DAG and priority data are finalized.
1711 initQueues(TopRoots, BotRoots);
1712
1713 bool IsTopNode = false;
1714 while (true) {
1715 if (!checkSchedLimit())
1716 break;
1717
1718 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1719 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1720 if (!SU) break;
1721
1722 assert(!SU->isScheduled && "Node already scheduled");
1723
1724 scheduleMI(SU, IsTopNode);
1725
1726 if (DFSResult) {
1727 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1728 if (!ScheduledTrees.test(SubtreeID)) {
1729 ScheduledTrees.set(SubtreeID);
1730 DFSResult->scheduleTree(SubtreeID);
1731 SchedImpl->scheduleTree(SubtreeID);
1732 }
1733 }
1734
1735 // Notify the scheduling strategy after updating the DAG.
1736 SchedImpl->schedNode(SU, IsTopNode);
1737
1738 updateQueues(SU, IsTopNode);
1739 }
1740 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1741
1743
1744 LLVM_DEBUG({
1745 dbgs() << "*** Final schedule for "
1746 << printMBBReference(*begin()->getParent()) << " ***\n";
1747 dumpSchedule();
1748 dbgs() << '\n';
1749 });
1750}
1751
1752/// Build the DAG and setup three register pressure trackers.
1754 if (!ShouldTrackPressure) {
1755 RPTracker.reset();
1756 RegionCriticalPSets.clear();
1758 return;
1759 }
1760
1761 // Initialize the register pressure tracker used by buildSchedGraph.
1763 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1764
1765 // Account for liveness generate by the region boundary.
1766 if (LiveRegionEnd != RegionEnd)
1767 RPTracker.recede();
1768
1769 // Build the DAG, and compute current register pressure.
1771
1772 // Initialize top/bottom trackers after computing region pressure.
1774}
1775
1777 if (!DFSResult)
1778 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1779 DFSResult->clear();
1780 ScheduledTrees.clear();
1781 DFSResult->resize(SUnits.size());
1782 DFSResult->compute(SUnits);
1783 ScheduledTrees.resize(DFSResult->getNumSubtrees());
1784}
1785
1786/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1787/// only provides the critical path for single block loops. To handle loops that
1788/// span blocks, we could use the vreg path latencies provided by
1789/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1790/// available for use in the scheduler.
1791///
1792/// The cyclic path estimation identifies a def-use pair that crosses the back
1793/// edge and considers the depth and height of the nodes. For example, consider
1794/// the following instruction sequence where each instruction has unit latency
1795/// and defines an eponymous virtual register:
1796///
1797/// a->b(a,c)->c(b)->d(c)->exit
1798///
1799/// The cyclic critical path is a two cycles: b->c->b
1800/// The acyclic critical path is four cycles: a->b->c->d->exit
1801/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1802/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1803/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1804/// LiveInDepth = depth(b) = len(a->b) = 1
1805///
1806/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1807/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1808/// CyclicCriticalPath = min(2, 2) = 2
1809///
1810/// This could be relevant to PostRA scheduling, but is currently implemented
1811/// assuming LiveIntervals.
1813 // This only applies to single block loop.
1814 if (!BB->isSuccessor(BB))
1815 return 0;
1816
1817 unsigned MaxCyclicLatency = 0;
1818 // Visit each live out vreg def to find def/use pairs that cross iterations.
1819 for (const VRegMaskOrUnit &P : RPTracker.getPressure().LiveOutRegs) {
1820 if (!P.VRegOrUnit.isVirtualReg())
1821 continue;
1822 Register Reg = P.VRegOrUnit.asVirtualReg();
1823 const LiveInterval &LI = LIS->getInterval(Reg);
1824 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1825 if (!DefVNI)
1826 continue;
1827
1828 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1829 const SUnit *DefSU = getSUnit(DefMI);
1830 if (!DefSU)
1831 continue;
1832
1833 unsigned LiveOutHeight = DefSU->getHeight();
1834 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1835 // Visit all local users of the vreg def.
1836 for (const VReg2SUnit &V2SU
1837 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1838 SUnit *SU = V2SU.SU;
1839 if (SU == &ExitSU)
1840 continue;
1841
1842 // Only consider uses of the phi.
1843 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1844 if (!LRQ.valueIn()->isPHIDef())
1845 continue;
1846
1847 // Assume that a path spanning two iterations is a cycle, which could
1848 // overestimate in strange cases. This allows cyclic latency to be
1849 // estimated as the minimum slack of the vreg's depth or height.
1850 unsigned CyclicLatency = 0;
1851 if (LiveOutDepth > SU->getDepth())
1852 CyclicLatency = LiveOutDepth - SU->getDepth();
1853
1854 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1855 if (LiveInHeight > LiveOutHeight) {
1856 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1857 CyclicLatency = LiveInHeight - LiveOutHeight;
1858 } else
1859 CyclicLatency = 0;
1860
1861 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1862 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1863 if (CyclicLatency > MaxCyclicLatency)
1864 MaxCyclicLatency = CyclicLatency;
1865 }
1866 }
1867 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1868 return MaxCyclicLatency;
1869}
1870
1871/// Release ExitSU predecessors and setup scheduler queues. Re-position
1872/// the Top RP tracker in case the region beginning has changed.
1874 ArrayRef<SUnit*> BotRoots) {
1875 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1876 if (ShouldTrackPressure) {
1877 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1878 TopRPTracker.setPos(CurrentTop);
1879 }
1880}
1881
1882/// Move an instruction and update register pressure.
1883void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1884 // Move the instruction to its new location in the instruction stream.
1885 MachineInstr *MI = SU->getInstr();
1886
1887 if (IsTopNode) {
1888 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1889 if (&*CurrentTop == MI)
1891 else {
1893 TopRPTracker.setPos(MI);
1894 }
1895
1896 if (ShouldTrackPressure) {
1897 // Update top scheduled pressure.
1898 RegisterOperands RegOpers;
1899 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1900 /*IgnoreDead=*/false);
1902 // Adjust liveness and add missing dead+read-undef flags.
1903 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1904 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1905 } else {
1906 // Adjust for missing dead-def flags.
1907 RegOpers.detectDeadDefs(*MI, *LIS);
1908 }
1909
1910 TopRPTracker.advance(RegOpers);
1911 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1912 LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1913 TopRPTracker.getRegSetPressureAtPos(), TRI););
1914
1915 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1916 }
1917 } else {
1918 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1921 if (&*priorII == MI)
1922 CurrentBottom = priorII;
1923 else {
1924 if (&*CurrentTop == MI) {
1925 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1926 TopRPTracker.setPos(CurrentTop);
1927 }
1929 CurrentBottom = MI;
1931 }
1932 if (ShouldTrackPressure) {
1933 RegisterOperands RegOpers;
1934 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1935 /*IgnoreDead=*/false);
1937 // Adjust liveness and add missing dead+read-undef flags.
1938 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1939 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1940 } else {
1941 // Adjust for missing dead-def flags.
1942 RegOpers.detectDeadDefs(*MI, *LIS);
1943 }
1944
1945 if (BotRPTracker.getPos() != CurrentBottom)
1946 BotRPTracker.recedeSkipDebugValues();
1948 BotRPTracker.recede(RegOpers, &LiveUses);
1949 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1950 LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1951 BotRPTracker.getRegSetPressureAtPos(), TRI););
1952
1953 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1954 updatePressureDiffs(LiveUses);
1955 }
1956 }
1957}
1958
1959//===----------------------------------------------------------------------===//
1960// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1961//===----------------------------------------------------------------------===//
1962
1963namespace {
1964
1965/// Post-process the DAG to create cluster edges between neighboring
1966/// loads or between neighboring stores.
1967class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1968 struct MemOpInfo {
1969 SUnit *SU;
1971 int64_t Offset;
1972 LocationSize Width;
1973 bool OffsetIsScalable;
1974
1975 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1976 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1977 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1978 OffsetIsScalable(OffsetIsScalable) {}
1979
1980 static bool Compare(const MachineOperand *const &A,
1981 const MachineOperand *const &B) {
1982 if (A->getType() != B->getType())
1983 return A->getType() < B->getType();
1984 if (A->isReg())
1985 return A->getReg() < B->getReg();
1986 if (A->isFI()) {
1987 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1989 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1991 return StackGrowsDown ? A->getIndex() > B->getIndex()
1992 : A->getIndex() < B->getIndex();
1993 }
1994
1995 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1996 "index bases.");
1997 }
1998
1999 bool operator<(const MemOpInfo &RHS) const {
2000 // FIXME: Don't compare everything twice. Maybe use C++20 three way
2001 // comparison instead when it's available.
2002 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
2003 RHS.BaseOps.begin(), RHS.BaseOps.end(),
2004 Compare))
2005 return true;
2006 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
2007 BaseOps.begin(), BaseOps.end(), Compare))
2008 return false;
2009 if (Offset != RHS.Offset)
2010 return Offset < RHS.Offset;
2011 return SU->NodeNum < RHS.SU->NodeNum;
2012 }
2013 };
2014
2015 const TargetInstrInfo *TII;
2016 const TargetRegisterInfo *TRI;
2017 bool IsLoad;
2018 bool ReorderWhileClustering;
2019
2020public:
2021 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
2022 const TargetRegisterInfo *tri, bool IsLoad,
2023 bool ReorderWhileClustering)
2024 : TII(tii), TRI(tri), IsLoad(IsLoad),
2025 ReorderWhileClustering(ReorderWhileClustering) {}
2026
2027 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2028
2029protected:
2030 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
2031 ScheduleDAGInstrs *DAG);
2032 void collectMemOpRecords(std::vector<SUnit> &SUnits,
2033 SmallVectorImpl<MemOpInfo> &MemOpRecords);
2034 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2035 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
2036};
2037
2038class StoreClusterMutation : public BaseMemOpClusterMutation {
2039public:
2040 StoreClusterMutation(const TargetInstrInfo *tii,
2041 const TargetRegisterInfo *tri,
2042 bool ReorderWhileClustering)
2043 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
2044};
2045
2046class LoadClusterMutation : public BaseMemOpClusterMutation {
2047public:
2048 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
2049 bool ReorderWhileClustering)
2050 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
2051};
2052
2053} // end anonymous namespace
2054
2055std::unique_ptr<ScheduleDAGMutation>
2057 const TargetRegisterInfo *TRI,
2058 bool ReorderWhileClustering) {
2059 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
2060 TII, TRI, ReorderWhileClustering)
2061 : nullptr;
2062}
2063
2064std::unique_ptr<ScheduleDAGMutation>
2066 const TargetRegisterInfo *TRI,
2067 bool ReorderWhileClustering) {
2068 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
2069 TII, TRI, ReorderWhileClustering)
2070 : nullptr;
2071}
2072
2073// Sorting all the loads/stores first, then for each load/store, checking the
2074// following load/store one by one, until reach the first non-dependent one and
2075// call target hook to see if they can cluster.
2076// If FastCluster is enabled, we assume that, all the loads/stores have been
2077// preprocessed and now, they didn't have dependencies on each other.
2078void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2079 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2080 ScheduleDAGInstrs *DAG) {
2081 // Keep track of the current cluster length and bytes for each SUnit.
2084
2085 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2086 // cluster mem ops collected within `MemOpRecords` array.
2087 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
2088 // Decision to cluster mem ops is taken based on target dependent logic
2089 auto MemOpa = MemOpRecords[Idx];
2090
2091 // Seek for the next load/store to do the cluster.
2092 unsigned NextIdx = Idx + 1;
2093 for (; NextIdx < End; ++NextIdx)
2094 // Skip if MemOpb has been clustered already or has dependency with
2095 // MemOpa.
2096 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
2097 (FastCluster ||
2098 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2099 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2100 break;
2101 if (NextIdx == End)
2102 continue;
2103
2104 auto MemOpb = MemOpRecords[NextIdx];
2105 unsigned ClusterLength = 2;
2106 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2107 MemOpb.Width.getValue().getKnownMinValue();
2108 auto It = SUnit2ClusterInfo.find(MemOpa.SU->NodeNum);
2109 if (It != SUnit2ClusterInfo.end()) {
2110 const auto &[Len, Bytes] = It->second;
2111 ClusterLength = Len + 1;
2112 CurrentClusterBytes = Bytes + MemOpb.Width.getValue().getKnownMinValue();
2113 }
2114
2115 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2116 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2117 MemOpb.Offset, MemOpb.OffsetIsScalable,
2118 ClusterLength, CurrentClusterBytes))
2119 continue;
2120
2121 SUnit *SUa = MemOpa.SU;
2122 SUnit *SUb = MemOpb.SU;
2123
2124 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2125 std::swap(SUa, SUb);
2126
2127 // FIXME: Is this check really required?
2128 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
2129 continue;
2130
2131 Clusters.unionSets(SUa, SUb);
2132 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2133 << SUb->NodeNum << ")\n");
2134 ++NumClustered;
2135
2136 if (IsLoad) {
2137 // Copy successor edges from SUa to SUb. Interleaving computation
2138 // dependent on SUa can prevent load combining due to register reuse.
2139 // Predecessor edges do not need to be copied from SUb to SUa since
2140 // nearby loads should have effectively the same inputs.
2141 for (const SDep &Succ : SUa->Succs) {
2142 if (Succ.getSUnit() == SUb)
2143 continue;
2144 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2145 << ")\n");
2146 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
2147 }
2148 } else {
2149 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
2150 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
2151 // do not need to be copied from SUa to SUb since no one will depend
2152 // on stores.
2153 // Notice that, we don't need to care about the memory dependency as
2154 // we won't try to cluster them if they have any memory dependency.
2155 for (const SDep &Pred : SUb->Preds) {
2156 if (Pred.getSUnit() == SUa)
2157 continue;
2158 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2159 << ")\n");
2160 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
2161 }
2162 }
2163
2164 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2165 CurrentClusterBytes};
2166
2167 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2168 << ", Curr cluster bytes: " << CurrentClusterBytes
2169 << "\n");
2170 }
2171
2172 // Add cluster group information.
2173 // Iterate over all of the equivalence sets.
2174 auto &AllClusters = DAG->getClusters();
2175 for (const EquivalenceClasses<SUnit *>::ECValue *I : Clusters) {
2176 if (!I->isLeader())
2177 continue;
2178 ClusterInfo Group;
2179 unsigned ClusterIdx = AllClusters.size();
2180 for (SUnit *MemberI : Clusters.members(*I)) {
2181 MemberI->ParentClusterIdx = ClusterIdx;
2182 Group.insert(MemberI);
2183 }
2184 AllClusters.push_back(Group);
2185 }
2186}
2187
2188void BaseMemOpClusterMutation::collectMemOpRecords(
2189 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2190 for (auto &SU : SUnits) {
2191 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
2192 (!IsLoad && !SU.getInstr()->mayStore()))
2193 continue;
2194
2195 const MachineInstr &MI = *SU.getInstr();
2197 int64_t Offset;
2198 bool OffsetIsScalable;
2201 OffsetIsScalable, Width, TRI)) {
2202 if (!Width.hasValue())
2203 continue;
2204
2205 MemOpRecords.push_back(
2206 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
2207
2208 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2209 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
2210 << ", Width: " << Width << "\n");
2211 }
2212#ifndef NDEBUG
2213 for (const auto *Op : BaseOps)
2214 assert(Op);
2215#endif
2216 }
2217}
2218
2219bool BaseMemOpClusterMutation::groupMemOps(
2222 bool FastCluster =
2224 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
2225
2226 for (const auto &MemOp : MemOps) {
2227 unsigned ChainPredID = DAG->SUnits.size();
2228 if (FastCluster) {
2229 for (const SDep &Pred : MemOp.SU->Preds) {
2230 // We only want to cluster the mem ops that have the same ctrl(non-data)
2231 // pred so that they didn't have ctrl dependency for each other. But for
2232 // store instrs, we can still cluster them if the pred is load instr.
2233 if ((Pred.isCtrl() &&
2234 (IsLoad ||
2235 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2236 !Pred.isArtificial()) {
2237 ChainPredID = Pred.getSUnit()->NodeNum;
2238 break;
2239 }
2240 }
2241 } else
2242 ChainPredID = 0;
2243
2244 Groups[ChainPredID].push_back(MemOp);
2245 }
2246 return FastCluster;
2247}
2248
2249/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2250void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2251 // Collect all the clusterable loads/stores
2252 SmallVector<MemOpInfo, 32> MemOpRecords;
2253 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2254
2255 if (MemOpRecords.size() < 2)
2256 return;
2257
2258 // Put the loads/stores without dependency into the same group with some
2259 // heuristic if the DAG is too complex to avoid compiling time blow up.
2260 // Notice that, some fusion pair could be lost with this.
2262 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2263
2264 for (auto &Group : Groups) {
2265 // Sorting the loads/stores, so that, we can stop the cluster as early as
2266 // possible.
2267 llvm::sort(Group.second);
2268
2269 // Trying to cluster all the neighboring loads/stores.
2270 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2271 }
2272}
2273
2274//===----------------------------------------------------------------------===//
2275// CopyConstrain - DAG post-processing to encourage copy elimination.
2276//===----------------------------------------------------------------------===//
2277
2278namespace {
2279
2280/// Post-process the DAG to create weak edges from all uses of a copy to
2281/// the one use that defines the copy's source vreg, most likely an induction
2282/// variable increment.
2283class CopyConstrain : public ScheduleDAGMutation {
2284 // Transient state.
2285 SlotIndex RegionBeginIdx;
2286
2287 // RegionEndIdx is the slot index of the last non-debug instruction in the
2288 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2289 SlotIndex RegionEndIdx;
2290
2291public:
2292 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2293
2294 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2295
2296protected:
2297 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2298};
2299
2300} // end anonymous namespace
2301
2302std::unique_ptr<ScheduleDAGMutation>
2304 const TargetRegisterInfo *TRI) {
2305 return std::make_unique<CopyConstrain>(TII, TRI);
2306}
2307
2308/// constrainLocalCopy handles two possibilities:
2309/// 1) Local src:
2310/// I0: = dst
2311/// I1: src = ...
2312/// I2: = dst
2313/// I3: dst = src (copy)
2314/// (create pred->succ edges I0->I1, I2->I1)
2315///
2316/// 2) Local copy:
2317/// I0: dst = src (copy)
2318/// I1: = dst
2319/// I2: src = ...
2320/// I3: = dst
2321/// (create pred->succ edges I1->I2, I3->I2)
2322///
2323/// Although the MachineScheduler is currently constrained to single blocks,
2324/// this algorithm should handle extended blocks. An EBB is a set of
2325/// contiguously numbered blocks such that the previous block in the EBB is
2326/// always the single predecessor.
2327void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2328 LiveIntervals *LIS = DAG->getLIS();
2329 MachineInstr *Copy = CopySU->getInstr();
2330
2331 // Check for pure vreg copies.
2332 const MachineOperand &SrcOp = Copy->getOperand(1);
2333 Register SrcReg = SrcOp.getReg();
2334 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2335 return;
2336
2337 const MachineOperand &DstOp = Copy->getOperand(0);
2338 Register DstReg = DstOp.getReg();
2339 if (!DstReg.isVirtual() || DstOp.isDead())
2340 return;
2341
2342 // Check if either the dest or source is local. If it's live across a back
2343 // edge, it's not local. Note that if both vregs are live across the back
2344 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2345 // If both the copy's source and dest are local live intervals, then we
2346 // should treat the dest as the global for the purpose of adding
2347 // constraints. This adds edges from source's other uses to the copy.
2348 unsigned LocalReg = SrcReg;
2349 unsigned GlobalReg = DstReg;
2350 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2351 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2352 LocalReg = DstReg;
2353 GlobalReg = SrcReg;
2354 LocalLI = &LIS->getInterval(LocalReg);
2355 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2356 return;
2357 }
2358 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2359
2360 // Find the global segment after the start of the local LI.
2361 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2362 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2363 // local live range. We could create edges from other global uses to the local
2364 // start, but the coalescer should have already eliminated these cases, so
2365 // don't bother dealing with it.
2366 if (GlobalSegment == GlobalLI->end())
2367 return;
2368
2369 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2370 // returned the next global segment. But if GlobalSegment overlaps with
2371 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2372 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2373 if (GlobalSegment->contains(LocalLI->beginIndex()))
2374 ++GlobalSegment;
2375
2376 if (GlobalSegment == GlobalLI->end())
2377 return;
2378
2379 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2380 if (GlobalSegment != GlobalLI->begin()) {
2381 // Two address defs have no hole.
2382 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2383 GlobalSegment->start)) {
2384 return;
2385 }
2386 // If the prior global segment may be defined by the same two-address
2387 // instruction that also defines LocalLI, then can't make a hole here.
2388 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2389 LocalLI->beginIndex())) {
2390 return;
2391 }
2392 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2393 // it would be a disconnected component in the live range.
2394 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2395 "Disconnected LRG within the scheduling region.");
2396 }
2397 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2398 if (!GlobalDef)
2399 return;
2400
2401 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2402 if (!GlobalSU)
2403 return;
2404
2405 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2406 // constraining the uses of the last local def to precede GlobalDef.
2407 SmallVector<SUnit*,8> LocalUses;
2408 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2409 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2410 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2411 for (const SDep &Succ : LastLocalSU->Succs) {
2412 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2413 continue;
2414 if (Succ.getSUnit() == GlobalSU)
2415 continue;
2416 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2417 return;
2418 LocalUses.push_back(Succ.getSUnit());
2419 }
2420 // Open the top of the GlobalLI hole by constraining any earlier global uses
2421 // to precede the start of LocalLI.
2422 SmallVector<SUnit*,8> GlobalUses;
2423 MachineInstr *FirstLocalDef =
2424 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2425 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2426 for (const SDep &Pred : GlobalSU->Preds) {
2427 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2428 continue;
2429 if (Pred.getSUnit() == FirstLocalSU)
2430 continue;
2431 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2432 return;
2433 GlobalUses.push_back(Pred.getSUnit());
2434 }
2435 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2436 // Add the weak edges.
2437 for (SUnit *LU : LocalUses) {
2438 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2439 << GlobalSU->NodeNum << ")\n");
2440 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2441 }
2442 for (SUnit *GU : GlobalUses) {
2443 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2444 << FirstLocalSU->NodeNum << ")\n");
2445 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2446 }
2447}
2448
2449/// Callback from DAG postProcessing to create weak edges to encourage
2450/// copy elimination.
2451void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2452 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2453 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2454
2455 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2456 if (FirstPos == DAG->end())
2457 return;
2458 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2459 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2460 *priorNonDebug(DAG->end(), DAG->begin()));
2461
2462 for (SUnit &SU : DAG->SUnits) {
2463 if (!SU.getInstr()->isCopy())
2464 continue;
2465
2466 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2467 }
2468}
2469
2470//===----------------------------------------------------------------------===//
2471// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2472// and possibly other custom schedulers.
2473//===----------------------------------------------------------------------===//
2474
2475static const unsigned InvalidCycle = ~0U;
2476
2478
2479/// Given a Count of resource usage and a Latency value, return true if a
2480/// SchedBoundary becomes resource limited.
2481/// If we are checking after scheduling a node, we should return true when
2482/// we just reach the resource limit.
2483static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2484 unsigned Latency, bool AfterSchedNode) {
2485 int ResCntFactor = (int)(Count - (Latency * LFactor));
2486 if (AfterSchedNode)
2487 return ResCntFactor >= (int)LFactor;
2488 else
2489 return ResCntFactor > (int)LFactor;
2490}
2491
2493 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2494 // Destroying and reconstructing it is very expensive though. So keep
2495 // invalid, placeholder HazardRecs.
2496 if (HazardRec && HazardRec->isEnabled()) {
2497 delete HazardRec;
2498 HazardRec = nullptr;
2499 }
2500 Available.clear();
2501 Pending.clear();
2502 CheckPending = false;
2503 CurrCycle = 0;
2504 CurrMOps = 0;
2505 MinReadyCycle = std::numeric_limits<unsigned>::max();
2506 ExpectedLatency = 0;
2507 DependentLatency = 0;
2508 RetiredMOps = 0;
2509 MaxExecutedResCount = 0;
2510 ZoneCritResIdx = 0;
2511 IsResourceLimited = false;
2512 ReservedCycles.clear();
2513 ReservedResourceSegments.clear();
2514 ReservedCyclesIndex.clear();
2515 ResourceGroupSubUnitMasks.clear();
2516#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2517 // Track the maximum number of stall cycles that could arise either from the
2518 // latency of a DAG edge or the number of cycles that a processor resource is
2519 // reserved (SchedBoundary::ReservedCycles).
2520 MaxObservedStall = 0;
2521#endif
2522 // Reserve a zero-count for invalid CritResIdx.
2523 ExecutedResCounts.resize(1);
2524 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2525}
2526
2528init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2529 reset();
2530 if (!SchedModel->hasInstrSchedModel())
2531 return;
2532 RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2533 for (SUnit &SU : DAG->SUnits) {
2534 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2535 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2536 * SchedModel->getMicroOpFactor();
2538 PI = SchedModel->getWriteProcResBegin(SC),
2539 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2540 unsigned PIdx = PI->ProcResourceIdx;
2541 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2542 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2543 RemainingCounts[PIdx] +=
2544 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2545 }
2546 }
2547}
2548
2550init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2551 reset();
2552 DAG = dag;
2553 SchedModel = smodel;
2554 Rem = rem;
2555 if (SchedModel->hasInstrSchedModel()) {
2556 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2557 ReservedCyclesIndex.resize(ResourceCount);
2558 ExecutedResCounts.resize(ResourceCount);
2559 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2560 unsigned NumUnits = 0;
2561
2562 for (unsigned i = 0; i < ResourceCount; ++i) {
2563 ReservedCyclesIndex[i] = NumUnits;
2564 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2565 if (isReservedGroup(i)) {
2566 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2567 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2568 U != UE; ++U)
2569 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2570 }
2571 }
2572
2573 ReservedCycles.resize(NumUnits, InvalidCycle);
2574 }
2575}
2576
2577/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2578/// these "soft stalls" differently than the hard stall cycles based on CPU
2579/// resources and computed by checkHazard(). A fully in-order model
2580/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2581/// available for scheduling until they are ready. However, a weaker in-order
2582/// model may use this for heuristics. For example, if a processor has in-order
2583/// behavior when reading certain resources, this may come into play.
2585 if (!SU->isUnbuffered)
2586 return 0;
2587
2588 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2589 if (ReadyCycle > CurrCycle)
2590 return ReadyCycle - CurrCycle;
2591 return 0;
2592}
2593
2594/// Compute the next cycle at which the given processor resource unit
2595/// can be scheduled.
2597 unsigned ReleaseAtCycle,
2598 unsigned AcquireAtCycle) {
2599 if (SchedModel && SchedModel->enableIntervals()) {
2600 if (isTop())
2601 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2602 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2603
2604 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2605 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2606 }
2607
2608 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2609 // If this resource has never been used, always return cycle zero.
2610 if (NextUnreserved == InvalidCycle)
2611 return CurrCycle;
2612 // For bottom-up scheduling add the cycles needed for the current operation.
2613 if (!isTop())
2614 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2615 return NextUnreserved;
2616}
2617
2618/// Compute the next cycle at which the given processor resource can be
2619/// scheduled. Returns the next cycle and the index of the processor resource
2620/// instance in the reserved cycles vector.
2621std::pair<unsigned, unsigned>
2623 unsigned ReleaseAtCycle,
2624 unsigned AcquireAtCycle) {
2626 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2628 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2629 }
2630 unsigned MinNextUnreserved = InvalidCycle;
2631 unsigned InstanceIdx = 0;
2632 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2633 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2634 assert(NumberOfInstances > 0 &&
2635 "Cannot have zero instances of a ProcResource");
2636
2637 if (isReservedGroup(PIdx)) {
2638 // If any subunits are used by the instruction, report that the
2639 // subunits of the resource group are available at the first cycle
2640 // in which the unit is available, effectively removing the group
2641 // record from hazarding and basing the hazarding decisions on the
2642 // subunit records. Otherwise, choose the first available instance
2643 // from among the subunits. Specifications which assign cycles to
2644 // both the subunits and the group or which use an unbuffered
2645 // group with buffered subunits will appear to schedule
2646 // strangely. In the first case, the additional cycles for the
2647 // group will be ignored. In the second, the group will be
2648 // ignored entirely.
2649 for (const MCWriteProcResEntry &PE :
2650 make_range(SchedModel->getWriteProcResBegin(SC),
2651 SchedModel->getWriteProcResEnd(SC)))
2652 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2653 return std::make_pair(getNextResourceCycleByInstance(
2654 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2655 StartIndex);
2656
2657 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2658 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2659 unsigned NextUnreserved, NextInstanceIdx;
2660 std::tie(NextUnreserved, NextInstanceIdx) =
2661 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2662 if (MinNextUnreserved > NextUnreserved) {
2663 InstanceIdx = NextInstanceIdx;
2664 MinNextUnreserved = NextUnreserved;
2665 }
2666 }
2667 return std::make_pair(MinNextUnreserved, InstanceIdx);
2668 }
2669
2670 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2671 ++I) {
2672 unsigned NextUnreserved =
2673 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2675 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2676 << NextUnreserved << "c\n");
2677 if (MinNextUnreserved > NextUnreserved) {
2678 InstanceIdx = I;
2679 MinNextUnreserved = NextUnreserved;
2680 }
2681 }
2683 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2684 << "[" << InstanceIdx - StartIndex << "]"
2685 << " available @" << MinNextUnreserved << "c"
2686 << "\n");
2687 return std::make_pair(MinNextUnreserved, InstanceIdx);
2688}
2689
2690/// Does this SU have a hazard within the current instruction group.
2691///
2692/// The scheduler supports two modes of hazard recognition. The first is the
2693/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2694/// supports highly complicated in-order reservation tables
2695/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2696///
2697/// The second is a streamlined mechanism that checks for hazards based on
2698/// simple counters that the scheduler itself maintains. It explicitly checks
2699/// for instruction dispatch limitations, including the number of micro-ops that
2700/// can dispatch per cycle.
2701///
2702/// TODO: Also check whether the SU must start a new group.
2704 if (HazardRec->isEnabled()
2705 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2707 << "hazard: SU(" << SU->NodeNum << ") reported by HazardRec\n");
2708 return true;
2709 }
2710
2711 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2712 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2713 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") uops="
2714 << uops << ", CurrMOps = " << CurrMOps << ", "
2715 << "CurrMOps + uops > issue width of "
2716 << SchedModel->getIssueWidth() << "\n");
2717 return true;
2718 }
2719
2720 if (CurrMOps > 0 &&
2721 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2722 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2723 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") must "
2724 << (isTop() ? "begin" : "end") << " group\n");
2725 return true;
2726 }
2727
2728 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2729 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2730 for (const MCWriteProcResEntry &PE :
2731 make_range(SchedModel->getWriteProcResBegin(SC),
2732 SchedModel->getWriteProcResEnd(SC))) {
2733 unsigned ResIdx = PE.ProcResourceIdx;
2734 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2735 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2736 unsigned NRCycle, InstanceIdx;
2737 std::tie(NRCycle, InstanceIdx) =
2738 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2739 if (NRCycle > CurrCycle) {
2740#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2741 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2742#endif
2744 << "hazard: SU(" << SU->NodeNum << ") "
2745 << SchedModel->getResourceName(ResIdx) << '['
2746 << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' << "="
2747 << NRCycle << "c, is later than "
2748 << "CurrCycle = " << CurrCycle << "c\n");
2749 return true;
2750 }
2751 }
2752 }
2753 return false;
2754}
2755
2756// Find the unscheduled node in ReadySUs with the highest latency.
2759 SUnit *LateSU = nullptr;
2760 unsigned RemLatency = 0;
2761 for (SUnit *SU : ReadySUs) {
2762 unsigned L = getUnscheduledLatency(SU);
2763 if (L > RemLatency) {
2764 RemLatency = L;
2765 LateSU = SU;
2766 }
2767 }
2768 if (LateSU) {
2769 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2770 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2771 }
2772 return RemLatency;
2773}
2774
2775// Count resources in this zone and the remaining unscheduled
2776// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2777// resource index, or zero if the zone is issue limited.
2779getOtherResourceCount(unsigned &OtherCritIdx) {
2780 OtherCritIdx = 0;
2781 if (!SchedModel->hasInstrSchedModel())
2782 return 0;
2783
2784 unsigned OtherCritCount = Rem->RemIssueCount
2785 + (RetiredMOps * SchedModel->getMicroOpFactor());
2786 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2787 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2788 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2789 PIdx != PEnd; ++PIdx) {
2790 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2791 if (OtherCount > OtherCritCount) {
2792 OtherCritCount = OtherCount;
2793 OtherCritIdx = PIdx;
2794 }
2795 }
2796 if (OtherCritIdx) {
2797 LLVM_DEBUG(
2798 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2799 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2800 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2801 }
2802 return OtherCritCount;
2803}
2804
2805void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2806 unsigned Idx) {
2807 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2808
2809#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2810 // ReadyCycle was been bumped up to the CurrCycle when this node was
2811 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2812 // scheduling, so may now be greater than ReadyCycle.
2813 if (ReadyCycle > CurrCycle)
2814 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2815#endif
2816
2817 if (ReadyCycle < MinReadyCycle)
2818 MinReadyCycle = ReadyCycle;
2819
2820 // Check for interlocks first. For the purpose of other heuristics, an
2821 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2822 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2823 bool HazardDetected = !IsBuffered && ReadyCycle > CurrCycle;
2824 if (HazardDetected)
2825 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum
2826 << ") ReadyCycle = " << ReadyCycle
2827 << " is later than CurrCycle = " << CurrCycle
2828 << " on an unbuffered resource" << "\n");
2829 else
2830 HazardDetected = checkHazard(SU);
2831
2832 if (!HazardDetected && Available.size() >= ReadyListLimit) {
2833 HazardDetected = true;
2834 LLVM_DEBUG(dbgs().indent(2) << "hazard: Available Q is full (size: "
2835 << Available.size() << ")\n");
2836 }
2837
2838 if (!HazardDetected) {
2839 Available.push(SU);
2841 << "Move SU(" << SU->NodeNum << ") into Available Q\n");
2842
2843 if (InPQueue)
2844 Pending.remove(Pending.begin() + Idx);
2845 return;
2846 }
2847
2848 if (!InPQueue)
2849 Pending.push(SU);
2850}
2851
2852/// Move the boundary of scheduled code by one cycle.
2853void SchedBoundary::bumpCycle(unsigned NextCycle) {
2854 if (SchedModel->getMicroOpBufferSize() == 0) {
2855 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2856 "MinReadyCycle uninitialized");
2857 if (MinReadyCycle > NextCycle)
2858 NextCycle = MinReadyCycle;
2859 }
2860 // Update the current micro-ops, which will issue in the next cycle.
2861 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2862 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2863
2864 // Decrement DependentLatency based on the next cycle.
2865 if ((NextCycle - CurrCycle) > DependentLatency)
2866 DependentLatency = 0;
2867 else
2868 DependentLatency -= (NextCycle - CurrCycle);
2869
2870 if (!HazardRec->isEnabled()) {
2871 // Bypass HazardRec virtual calls.
2872 CurrCycle = NextCycle;
2873 } else {
2874 // Bypass getHazardType calls in case of long latency.
2875 for (; CurrCycle != NextCycle; ++CurrCycle) {
2876 if (isTop())
2877 HazardRec->AdvanceCycle();
2878 else
2879 HazardRec->RecedeCycle();
2880 }
2881 }
2882 CheckPending = true;
2883 IsResourceLimited =
2884 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2885 getScheduledLatency(), true);
2886
2887 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2888 << '\n');
2889}
2890
2891void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2892 ExecutedResCounts[PIdx] += Count;
2893 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2894 MaxExecutedResCount = ExecutedResCounts[PIdx];
2895}
2896
2897/// Add the given processor resource to this scheduled zone.
2898///
2899/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2900/// cycles during which this resource is released.
2901///
2902/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2903/// cycles at which the resource is aquired after issue (assuming no stalls).
2904///
2905/// \return the next cycle at which the instruction may execute without
2906/// oversubscribing resources.
2907unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2908 unsigned ReleaseAtCycle,
2909 unsigned NextCycle,
2910 unsigned AcquireAtCycle) {
2911 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2912 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2913 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2914 << ReleaseAtCycle << "x" << Factor << "u\n");
2915
2916 // Update Executed resources counts.
2918 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2919 Rem->RemainingCounts[PIdx] -= Count;
2920
2921 // Check if this resource exceeds the current critical resource. If so, it
2922 // becomes the critical resource.
2923 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2924 ZoneCritResIdx = PIdx;
2925 LLVM_DEBUG(dbgs() << " *** Critical resource "
2926 << SchedModel->getResourceName(PIdx) << ": "
2927 << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2928 << "c\n");
2929 }
2930 // For reserved resources, record the highest cycle using the resource.
2931 unsigned NextAvailable, InstanceIdx;
2932 std::tie(NextAvailable, InstanceIdx) =
2933 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2934 if (NextAvailable > CurrCycle) {
2935 LLVM_DEBUG(dbgs() << " Resource conflict: "
2936 << SchedModel->getResourceName(PIdx)
2937 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2938 << " reserved until @" << NextAvailable << "\n");
2939 }
2940 return NextAvailable;
2941}
2942
2943/// Move the boundary of scheduled code by one SUnit.
2945 // checkHazard should prevent scheduling multiple instructions per cycle that
2946 // exceed the issue width.
2947 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2948 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2949 assert(
2950 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2951 "Cannot schedule this instruction's MicroOps in the current cycle.");
2952
2953 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2954 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2955
2956 unsigned NextCycle = CurrCycle;
2957 switch (SchedModel->getMicroOpBufferSize()) {
2958 case 0:
2959 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2960 break;
2961 case 1:
2962 if (ReadyCycle > NextCycle) {
2963 NextCycle = ReadyCycle;
2964 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2965 }
2966 break;
2967 default:
2968 // We don't currently model the OOO reorder buffer, so consider all
2969 // scheduled MOps to be "retired". We do loosely model in-order resource
2970 // latency. If this instruction uses an in-order resource, account for any
2971 // likely stall cycles.
2972 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2973 NextCycle = ReadyCycle;
2974 break;
2975 }
2976 RetiredMOps += IncMOps;
2977
2978 // Update resource counts and critical resource.
2979 if (SchedModel->hasInstrSchedModel()) {
2980 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2981 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2982 Rem->RemIssueCount -= DecRemIssue;
2983 if (ZoneCritResIdx) {
2984 // Scale scheduled micro-ops for comparing with the critical resource.
2985 unsigned ScaledMOps =
2986 RetiredMOps * SchedModel->getMicroOpFactor();
2987
2988 // If scaled micro-ops are now more than the previous critical resource by
2989 // a full cycle, then micro-ops issue becomes critical.
2990 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2991 >= (int)SchedModel->getLatencyFactor()) {
2992 ZoneCritResIdx = 0;
2993 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2994 << ScaledMOps / SchedModel->getLatencyFactor()
2995 << "c\n");
2996 }
2997 }
2999 PI = SchedModel->getWriteProcResBegin(SC),
3000 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3001 unsigned RCycle =
3002 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
3003 PI->AcquireAtCycle);
3004 if (RCycle > NextCycle)
3005 NextCycle = RCycle;
3006 }
3007 if (SU->hasReservedResource) {
3008 // For reserved resources, record the highest cycle using the resource.
3009 // For top-down scheduling, this is the cycle in which we schedule this
3010 // instruction plus the number of cycles the operations reserves the
3011 // resource. For bottom-up is it simply the instruction's cycle.
3013 PI = SchedModel->getWriteProcResBegin(SC),
3014 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3015 unsigned PIdx = PI->ProcResourceIdx;
3016 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
3017
3018 if (SchedModel && SchedModel->enableIntervals()) {
3019 unsigned ReservedUntil, InstanceIdx;
3020 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3021 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3022 if (isTop()) {
3023 ReservedResourceSegments[InstanceIdx].add(
3025 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3027 } else {
3028 ReservedResourceSegments[InstanceIdx].add(
3030 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3032 }
3033 } else {
3034
3035 unsigned ReservedUntil, InstanceIdx;
3036 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3037 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3038 if (isTop()) {
3039 ReservedCycles[InstanceIdx] =
3040 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
3041 } else
3042 ReservedCycles[InstanceIdx] = NextCycle;
3043 }
3044 }
3045 }
3046 }
3047 }
3048 // Update ExpectedLatency and DependentLatency.
3049 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
3050 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
3051 if (SU->getDepth() > TopLatency) {
3052 TopLatency = SU->getDepth();
3053 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
3054 << SU->NodeNum << ") " << TopLatency << "c\n");
3055 }
3056 if (SU->getHeight() > BotLatency) {
3057 BotLatency = SU->getHeight();
3058 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
3059 << SU->NodeNum << ") " << BotLatency << "c\n");
3060 }
3061 // If we stall for any reason, bump the cycle.
3062 if (NextCycle > CurrCycle)
3063 bumpCycle(NextCycle);
3064 else
3065 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
3066 // resource limited. If a stall occurred, bumpCycle does this.
3067 IsResourceLimited =
3068 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
3069 getScheduledLatency(), true);
3070
3071 // Update the reservation table.
3072 if (HazardRec->isEnabled()) {
3073 if (!isTop() && SU->isCall) {
3074 // Calls are scheduled with their preceding instructions. For bottom-up
3075 // scheduling, clear the pipeline state before emitting.
3076 HazardRec->Reset();
3077 }
3078 HazardRec->EmitInstruction(SU);
3079 // Scheduling an instruction may have made pending instructions available.
3080 CheckPending = true;
3081 }
3082
3083 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
3084 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
3085 // one cycle. Since we commonly reach the max MOps here, opportunistically
3086 // bump the cycle to avoid uselessly checking everything in the readyQ.
3087 CurrMOps += IncMOps;
3088
3089 // Bump the cycle count for issue group constraints.
3090 // This must be done after NextCycle has been adjust for all other stalls.
3091 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
3092 // currCycle to X.
3093 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
3094 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
3095 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
3096 << " group\n");
3097 bumpCycle(++NextCycle);
3098 }
3099
3100 while (CurrMOps >= SchedModel->getIssueWidth()) {
3101 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
3102 << CurrCycle << '\n');
3103 bumpCycle(++NextCycle);
3104 }
3106}
3107
3108/// Release pending ready nodes in to the available queue. This makes them
3109/// visible to heuristics.
3111 // If the available queue is empty, it is safe to reset MinReadyCycle.
3112 if (Available.empty())
3113 MinReadyCycle = std::numeric_limits<unsigned>::max();
3114
3115 // Check to see if any of the pending instructions are ready to issue. If
3116 // so, add them to the available queue.
3117 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
3118 SUnit *SU = *(Pending.begin() + I);
3119 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3120
3121 LLVM_DEBUG(dbgs() << "Checking pending node SU(" << SU->NodeNum << ")\n");
3122
3123 if (ReadyCycle < MinReadyCycle)
3124 MinReadyCycle = ReadyCycle;
3125
3126 if (Available.size() >= ReadyListLimit)
3127 break;
3128
3129 releaseNode(SU, ReadyCycle, true, I);
3130 if (E != Pending.size()) {
3131 --I;
3132 --E;
3133 }
3134 }
3135 CheckPending = false;
3136}
3137
3138/// Remove SU from the ready set for this boundary.
3140 if (Available.isInQueue(SU))
3141 Available.remove(Available.find(SU));
3142 else {
3143 assert(Pending.isInQueue(SU) && "bad ready count");
3144 Pending.remove(Pending.find(SU));
3145 }
3146}
3147
3148/// If this queue only has one ready candidate, return it. As a side effect,
3149/// defer any nodes that now hit a hazard, and advance the cycle until at least
3150/// one node is ready. If multiple instructions are ready, return NULL.
3152 if (CheckPending)
3154
3155 // Defer any ready instrs that now have a hazard.
3156 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
3157 if (checkHazard(*I)) {
3158 Pending.push(*I);
3159 I = Available.remove(I);
3160 continue;
3161 }
3162 ++I;
3163 }
3164 for (unsigned i = 0; Available.empty(); ++i) {
3165// FIXME: Re-enable assert once PR20057 is resolved.
3166// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3167// "permanent hazard");
3168 (void)i;
3169 bumpCycle(CurrCycle + 1);
3171 }
3172
3173 LLVM_DEBUG(Pending.dump());
3174 LLVM_DEBUG(Available.dump());
3175
3176 if (Available.size() == 1)
3177 return *Available.begin();
3178 return nullptr;
3179}
3180
3181#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3182
3183/// Dump the content of the \ref ReservedCycles vector for the
3184/// resources that are used in the basic block.
3185///
3187 if (!SchedModel->hasInstrSchedModel())
3188 return;
3189
3190 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3191 unsigned StartIdx = 0;
3192
3193 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3194 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3195 std::string ResName = SchedModel->getResourceName(ResIdx);
3196 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3197 dbgs() << ResName << "(" << UnitIdx << ") = ";
3198 if (SchedModel && SchedModel->enableIntervals()) {
3199 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3200 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3201 else
3202 dbgs() << "{ }\n";
3203 } else
3204 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3205 }
3206 StartIdx += NumUnits;
3207 }
3208}
3209
3210// This is useful information to dump after bumpNode.
3211// Note that the Queue contents are more useful before pickNodeFromQueue.
3213 unsigned ResFactor;
3214 unsigned ResCount;
3215 if (ZoneCritResIdx) {
3216 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3217 ResCount = getResourceCount(ZoneCritResIdx);
3218 } else {
3219 ResFactor = SchedModel->getMicroOpFactor();
3220 ResCount = RetiredMOps * ResFactor;
3221 }
3222 unsigned LFactor = SchedModel->getLatencyFactor();
3223 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3224 << " Retired: " << RetiredMOps;
3225 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3226 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3227 << ResCount / ResFactor << " "
3228 << SchedModel->getResourceName(ZoneCritResIdx)
3229 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3230 << (IsResourceLimited ? " - Resource" : " - Latency")
3231 << " limited.\n";
3234}
3235#endif
3236
3237//===----------------------------------------------------------------------===//
3238// GenericScheduler - Generic implementation of MachineSchedStrategy.
3239//===----------------------------------------------------------------------===//
3240
3244 if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
3245 return;
3246
3247 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3249 PI = SchedModel->getWriteProcResBegin(SC),
3250 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3251 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3252 ResDelta.CritResources += PI->ReleaseAtCycle;
3253 if (PI->ProcResourceIdx == Policy.DemandResIdx)
3254 ResDelta.DemandedResources += PI->ReleaseAtCycle;
3255 }
3256}
3257
3258/// Compute remaining latency. We need this both to determine whether the
3259/// overall schedule has become latency-limited and whether the instructions
3260/// outside this zone are resource or latency limited.
3261///
3262/// The "dependent" latency is updated incrementally during scheduling as the
3263/// max height/depth of scheduled nodes minus the cycles since it was
3264/// scheduled:
3265/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3266///
3267/// The "independent" latency is the max ready queue depth:
3268/// ILat = max N.depth for N in Available|Pending
3269///
3270/// RemainingLatency is the greater of independent and dependent latency.
3271///
3272/// These computations are expensive, especially in DAGs with many edges, so
3273/// only do them if necessary.
3274static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3275 unsigned RemLatency = CurrZone.getDependentLatency();
3276 RemLatency = std::max(RemLatency,
3277 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3278 RemLatency = std::max(RemLatency,
3279 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3280 return RemLatency;
3281}
3282
3283/// Returns true if the current cycle plus remaning latency is greater than
3284/// the critical path in the scheduling region.
3285bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3286 SchedBoundary &CurrZone,
3287 bool ComputeRemLatency,
3288 unsigned &RemLatency) const {
3289 // The current cycle is already greater than the critical path, so we are
3290 // already latency limited and don't need to compute the remaining latency.
3291 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3292 return true;
3293
3294 // If we haven't scheduled anything yet, then we aren't latency limited.
3295 if (CurrZone.getCurrCycle() == 0)
3296 return false;
3297
3298 if (ComputeRemLatency)
3299 RemLatency = computeRemLatency(CurrZone);
3300
3301 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3302}
3303
3304/// Set the CandPolicy given a scheduling zone given the current resources and
3305/// latencies inside and outside the zone.
3307 SchedBoundary &CurrZone,
3308 SchedBoundary *OtherZone) {
3309 // Apply preemptive heuristics based on the total latency and resources
3310 // inside and outside this zone. Potential stalls should be considered before
3311 // following this policy.
3312
3313 // Compute the critical resource outside the zone.
3314 unsigned OtherCritIdx = 0;
3315 unsigned OtherCount =
3316 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3317
3318 bool OtherResLimited = false;
3319 unsigned RemLatency = 0;
3320 bool RemLatencyComputed = false;
3321 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3322 RemLatency = computeRemLatency(CurrZone);
3323 RemLatencyComputed = true;
3324 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3325 OtherCount, RemLatency, false);
3326 }
3327
3328 // Schedule aggressively for latency in PostRA mode. We don't check for
3329 // acyclic latency during PostRA, and highly out-of-order processors will
3330 // skip PostRA scheduling.
3331 if (!OtherResLimited &&
3332 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3333 RemLatency))) {
3334 Policy.ReduceLatency |= true;
3335 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3336 << " RemainingLatency " << RemLatency << " + "
3337 << CurrZone.getCurrCycle() << "c > CritPath "
3338 << Rem.CriticalPath << "\n");
3339 }
3340 // If the same resource is limiting inside and outside the zone, do nothing.
3341 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3342 return;
3343
3344 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3345 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3346 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3347 } if (OtherResLimited) dbgs()
3348 << " RemainingLimit: "
3349 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3350 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3351 << " Latency limited both directions.\n");
3352
3353 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3354 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3355
3356 if (OtherResLimited)
3357 Policy.DemandResIdx = OtherCritIdx;
3358}
3359
3360#ifndef NDEBUG
3363 // clang-format off
3364 switch (Reason) {
3365 case NoCand: return "NOCAND ";
3366 case Only1: return "ONLY1 ";
3367 case PhysReg: return "PHYS-REG ";
3368 case RegExcess: return "REG-EXCESS";
3369 case RegCritical: return "REG-CRIT ";
3370 case Stall: return "STALL ";
3371 case Cluster: return "CLUSTER ";
3372 case Weak: return "WEAK ";
3373 case RegMax: return "REG-MAX ";
3374 case ResourceReduce: return "RES-REDUCE";
3375 case ResourceDemand: return "RES-DEMAND";
3376 case TopDepthReduce: return "TOP-DEPTH ";
3377 case TopPathReduce: return "TOP-PATH ";
3378 case BotHeightReduce:return "BOT-HEIGHT";
3379 case BotPathReduce: return "BOT-PATH ";
3380 case NodeOrder: return "ORDER ";
3381 case FirstValid: return "FIRST ";
3382 };
3383 // clang-format on
3384 llvm_unreachable("Unknown reason!");
3385}
3386
3389 unsigned ResIdx = 0;
3390 unsigned Latency = 0;
3391 switch (Cand.Reason) {
3392 default:
3393 break;
3394 case RegExcess:
3395 P = Cand.RPDelta.Excess;
3396 break;
3397 case RegCritical:
3398 P = Cand.RPDelta.CriticalMax;
3399 break;
3400 case RegMax:
3401 P = Cand.RPDelta.CurrentMax;
3402 break;
3403 case ResourceReduce:
3404 ResIdx = Cand.Policy.ReduceResIdx;
3405 break;
3406 case ResourceDemand:
3407 ResIdx = Cand.Policy.DemandResIdx;
3408 break;
3409 case TopDepthReduce:
3410 Latency = Cand.SU->getDepth();
3411 break;
3412 case TopPathReduce:
3413 Latency = Cand.SU->getHeight();
3414 break;
3415 case BotHeightReduce:
3416 Latency = Cand.SU->getHeight();
3417 break;
3418 case BotPathReduce:
3419 Latency = Cand.SU->getDepth();
3420 break;
3421 }
3422 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3423 if (P.isValid())
3424 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3425 << ":" << P.getUnitInc() << " ";
3426 else
3427 dbgs() << " ";
3428 if (ResIdx)
3429 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3430 else
3431 dbgs() << " ";
3432 if (Latency)
3433 dbgs() << " " << Latency << " cycles ";
3434 else
3435 dbgs() << " ";
3436 dbgs() << '\n';
3437}
3438#endif
3439
3440/// Return true if this heuristic determines order.
3441/// TODO: Consider refactor return type of these functions as integer or enum,
3442/// as we may need to differentiate whether TryCand is better than Cand.
3443bool llvm::tryLess(int TryVal, int CandVal,
3447 if (TryVal < CandVal) {
3448 TryCand.Reason = Reason;
3449 return true;
3450 }
3451 if (TryVal > CandVal) {
3452 if (Cand.Reason > Reason)
3453 Cand.Reason = Reason;
3454 return true;
3455 }
3456 return false;
3457}
3458
3459bool llvm::tryGreater(int TryVal, int CandVal,
3463 if (TryVal > CandVal) {
3464 TryCand.Reason = Reason;
3465 return true;
3466 }
3467 if (TryVal < CandVal) {
3468 if (Cand.Reason > Reason)
3469 Cand.Reason = Reason;
3470 return true;
3471 }
3472 return false;
3473}
3474
3477 SchedBoundary &Zone) {
3478 if (Zone.isTop()) {
3479 // Prefer the candidate with the lesser depth, but only if one of them has
3480 // depth greater than the total latency scheduled so far, otherwise either
3481 // of them could be scheduled now with no stall.
3482 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3483 Zone.getScheduledLatency()) {
3484 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3486 return true;
3487 }
3488 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3490 return true;
3491 } else {
3492 // Prefer the candidate with the lesser height, but only if one of them has
3493 // height greater than the total latency scheduled so far, otherwise either
3494 // of them could be scheduled now with no stall.
3495 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3496 Zone.getScheduledLatency()) {
3497 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3499 return true;
3500 }
3501 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3503 return true;
3504 }
3505 return false;
3506}
3507
3508static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
3509 bool IsPostRA = false) {
3510 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3511 << GenericSchedulerBase::getReasonStr(Reason) << " ["
3512 << (IsPostRA ? "post-RA" : "pre-RA") << "]\n");
3513
3514 if (IsPostRA) {
3515 if (IsTop)
3516 NumTopPostRA++;
3517 else
3518 NumBotPostRA++;
3519
3520 switch (Reason) {
3522 NumNoCandPostRA++;
3523 return;
3525 NumOnly1PostRA++;
3526 return;
3528 NumPhysRegPostRA++;
3529 return;
3531 NumRegExcessPostRA++;
3532 return;
3534 NumRegCriticalPostRA++;
3535 return;
3537 NumStallPostRA++;
3538 return;
3540 NumClusterPostRA++;
3541 return;
3543 NumWeakPostRA++;
3544 return;
3546 NumRegMaxPostRA++;
3547 return;
3549 NumResourceReducePostRA++;
3550 return;
3552 NumResourceDemandPostRA++;
3553 return;
3555 NumTopDepthReducePostRA++;
3556 return;
3558 NumTopPathReducePostRA++;
3559 return;
3561 NumBotHeightReducePostRA++;
3562 return;
3564 NumBotPathReducePostRA++;
3565 return;
3567 NumNodeOrderPostRA++;
3568 return;
3570 NumFirstValidPostRA++;
3571 return;
3572 };
3573 } else {
3574 if (IsTop)
3575 NumTopPreRA++;
3576 else
3577 NumBotPreRA++;
3578
3579 switch (Reason) {
3581 NumNoCandPreRA++;
3582 return;
3584 NumOnly1PreRA++;
3585 return;
3587 NumPhysRegPreRA++;
3588 return;
3590 NumRegExcessPreRA++;
3591 return;
3593 NumRegCriticalPreRA++;
3594 return;
3596 NumStallPreRA++;
3597 return;
3599 NumClusterPreRA++;
3600 return;
3602 NumWeakPreRA++;
3603 return;
3605 NumRegMaxPreRA++;
3606 return;
3608 NumResourceReducePreRA++;
3609 return;
3611 NumResourceDemandPreRA++;
3612 return;
3614 NumTopDepthReducePreRA++;
3615 return;
3617 NumTopPathReducePreRA++;
3618 return;
3620 NumBotHeightReducePreRA++;
3621 return;
3623 NumBotPathReducePreRA++;
3624 return;
3626 NumNodeOrderPreRA++;
3627 return;
3629 NumFirstValidPreRA++;
3630 return;
3631 };
3632 }
3633 llvm_unreachable("Unknown reason!");
3634}
3635
3637 bool IsPostRA = false) {
3638 tracePick(Cand.Reason, Cand.AtTop, IsPostRA);
3639}
3640
3642 assert(dag->hasVRegLiveness() &&
3643 "(PreRA)GenericScheduler needs vreg liveness");
3644 DAG = static_cast<ScheduleDAGMILive*>(dag);
3645 SchedModel = DAG->getSchedModel();
3646 TRI = DAG->TRI;
3647
3648 if (RegionPolicy.ComputeDFSResult)
3649 DAG->computeDFSResult();
3650
3651 Rem.init(DAG, SchedModel);
3652 Top.init(DAG, SchedModel, &Rem);
3653 Bot.init(DAG, SchedModel, &Rem);
3654
3655 // Initialize resource counts.
3656
3657 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3658 // are disabled, then these HazardRecs will be disabled.
3659 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3660 if (!Top.HazardRec) {
3661 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3662 }
3663 if (!Bot.HazardRec) {
3664 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3665 }
3666 TopCand.SU = nullptr;
3667 BotCand.SU = nullptr;
3668
3671}
3672
3673/// Initialize the per-region scheduling policy.
3676 unsigned NumRegionInstrs) {
3677 const MachineFunction &MF = *Begin->getMF();
3678 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3679
3680 // Avoid setting up the register pressure tracker for small regions to save
3681 // compile time. As a rough heuristic, only track pressure when the number of
3682 // schedulable instructions exceeds half the allocatable integer register file
3683 // that is the largest legal integer regiser type.
3684 RegionPolicy.ShouldTrackPressure = true;
3685 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3687 if (TLI->isTypeLegal(LegalIntVT)) {
3688 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3689 TLI->getRegClassFor(LegalIntVT));
3690 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3691 break;
3692 }
3693 }
3694
3695 // For generic targets, we default to bottom-up, because it's simpler and more
3696 // compile-time optimizations have been implemented in that direction.
3697 RegionPolicy.OnlyBottomUp = true;
3698
3699 // Allow the subtarget to override default policy.
3700 SchedRegion Region(Begin, End, NumRegionInstrs);
3702
3703 // After subtarget overrides, apply command line options.
3704 if (!EnableRegPressure) {
3705 RegionPolicy.ShouldTrackPressure = false;
3706 RegionPolicy.ShouldTrackLaneMasks = false;
3707 }
3708
3710 RegionPolicy.OnlyTopDown = true;
3711 RegionPolicy.OnlyBottomUp = false;
3712 } else if (PreRADirection == MISched::BottomUp) {
3713 RegionPolicy.OnlyTopDown = false;
3714 RegionPolicy.OnlyBottomUp = true;
3715 } else if (PreRADirection == MISched::Bidirectional) {
3716 RegionPolicy.OnlyBottomUp = false;
3717 RegionPolicy.OnlyTopDown = false;
3718 }
3719
3720 BotIdx = NumRegionInstrs - 1;
3721 this->NumRegionInstrs = NumRegionInstrs;
3722}
3723
3725 // Cannot completely remove virtual function even in release mode.
3726#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3727 dbgs() << "GenericScheduler RegionPolicy: "
3728 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3729 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3730 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3731 << "\n";
3732#endif
3733}
3734
3735/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3736/// critical path by more cycles than it takes to drain the instruction buffer.
3737/// We estimate an upper bounds on in-flight instructions as:
3738///
3739/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3740/// InFlightIterations = AcyclicPath / CyclesPerIteration
3741/// InFlightResources = InFlightIterations * LoopResources
3742///
3743/// TODO: Check execution resources in addition to IssueCount.
3745 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3746 return;
3747
3748 // Scaled number of cycles per loop iteration.
3749 unsigned IterCount =
3750 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3751 Rem.RemIssueCount);
3752 // Scaled acyclic critical path.
3753 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3754 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3755 unsigned InFlightCount =
3756 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3757 unsigned BufferLimit =
3758 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3759
3760 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3761
3762 LLVM_DEBUG(
3763 dbgs() << "IssueCycles="
3764 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3765 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3766 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3767 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3768 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3769 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3770}
3771
3773 Rem.CriticalPath = DAG->ExitSU.getDepth();
3774
3775 // Some roots may not feed into ExitSU. Check all of them in case.
3776 for (const SUnit *SU : Bot.Available) {
3777 if (SU->getDepth() > Rem.CriticalPath)
3778 Rem.CriticalPath = SU->getDepth();
3779 }
3780 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3782 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3783 }
3784
3785 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3786 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3788 }
3789}
3790
3791bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP,
3795 const TargetRegisterInfo *TRI,
3796 const MachineFunction &MF) {
3797 // If one candidate decreases and the other increases, go with it.
3798 // Invalid candidates have UnitInc==0.
3799 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3800 Reason)) {
3801 return true;
3802 }
3803 // Do not compare the magnitude of pressure changes between top and bottom
3804 // boundary.
3805 if (Cand.AtTop != TryCand.AtTop)
3806 return false;
3807
3808 // If both candidates affect the same set in the same boundary, go with the
3809 // smallest increase.
3810 unsigned TryPSet = TryP.getPSetOrMax();
3811 unsigned CandPSet = CandP.getPSetOrMax();
3812 if (TryPSet == CandPSet) {
3813 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3814 Reason);
3815 }
3816
3817 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3818 std::numeric_limits<int>::max();
3819
3820 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3821 std::numeric_limits<int>::max();
3822
3823 // If the candidates are decreasing pressure, reverse priority.
3824 if (TryP.getUnitInc() < 0)
3825 std::swap(TryRank, CandRank);
3826 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3827}
3828
3829unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) {
3830 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3831}
3832
3833/// Minimize physical register live ranges. Regalloc wants them adjacent to
3834/// their physreg def/use.
3835///
3836/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3837/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3838/// with the operation that produces or consumes the physreg. We'll do this when
3839/// regalloc has support for parallel copies.
3840int llvm::biasPhysReg(const SUnit *SU, bool isTop) {
3841 const MachineInstr *MI = SU->getInstr();
3842
3843 if (MI->isCopy()) {
3844 unsigned ScheduledOper = isTop ? 1 : 0;
3845 unsigned UnscheduledOper = isTop ? 0 : 1;
3846 // If we have already scheduled the physreg produce/consumer, immediately
3847 // schedule the copy.
3848 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3849 return 1;
3850 // If the physreg is at the boundary, defer it. Otherwise schedule it
3851 // immediately to free the dependent. We can hoist the copy later.
3852 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3853 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3854 return AtBoundary ? -1 : 1;
3855 }
3856
3857 if (MI->isMoveImmediate()) {
3858 // If we have a move immediate and all successors have been assigned, bias
3859 // towards scheduling this later. Make sure all register defs are to
3860 // physical registers.
3861 bool DoBias = true;
3862 for (const MachineOperand &Op : MI->defs()) {
3863 if (Op.isReg() && !Op.getReg().isPhysical()) {
3864 DoBias = false;
3865 break;
3866 }
3867 }
3868
3869 if (DoBias)
3870 return isTop ? -1 : 1;
3871 }
3872
3873 return 0;
3874}
3875
3877 bool AtTop,
3878 const RegPressureTracker &RPTracker,
3879 RegPressureTracker &TempTracker) {
3880 Cand.SU = SU;
3881 Cand.AtTop = AtTop;
3882 if (DAG->isTrackingPressure()) {
3883 if (AtTop) {
3884 TempTracker.getMaxDownwardPressureDelta(
3885 Cand.SU->getInstr(),
3886 Cand.RPDelta,
3887 DAG->getRegionCriticalPSets(),
3888 DAG->getRegPressure().MaxSetPressure);
3889 } else {
3890 if (VerifyScheduling) {
3891 TempTracker.getMaxUpwardPressureDelta(
3892 Cand.SU->getInstr(),
3893 &DAG->getPressureDiff(Cand.SU),
3894 Cand.RPDelta,
3895 DAG->getRegionCriticalPSets(),
3896 DAG->getRegPressure().MaxSetPressure);
3897 } else {
3898 RPTracker.getUpwardPressureDelta(
3899 Cand.SU->getInstr(),
3900 DAG->getPressureDiff(Cand.SU),
3901 Cand.RPDelta,
3902 DAG->getRegionCriticalPSets(),
3903 DAG->getRegPressure().MaxSetPressure);
3904 }
3905 }
3906 }
3907 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3908 << " Try SU(" << Cand.SU->NodeNum << ") "
3909 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3910 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3911}
3912
3913/// Apply a set of heuristics to a new candidate. Heuristics are currently
3914/// hierarchical. This may be more efficient than a graduated cost model because
3915/// we don't need to evaluate all aspects of the model for each node in the
3916/// queue. But it's really done to make the heuristics easier to debug and
3917/// statistically analyze.
3918///
3919/// \param Cand provides the policy and current best candidate.
3920/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3921/// \param Zone describes the scheduled zone that we are extending, or nullptr
3922/// if Cand is from a different zone than TryCand.
3923/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3925 SchedCandidate &TryCand,
3926 SchedBoundary *Zone) const {
3927 // Initialize the candidate if needed.
3928 if (!Cand.isValid()) {
3929 TryCand.Reason = FirstValid;
3930 return true;
3931 }
3932
3933 // Bias PhysReg Defs and copies to their uses and defined respectively.
3934 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3935 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3936 return TryCand.Reason != NoCand;
3937
3938 // Avoid exceeding the target's limit.
3939 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3940 Cand.RPDelta.Excess,
3941 TryCand, Cand, RegExcess, TRI,
3942 DAG->MF))
3943 return TryCand.Reason != NoCand;
3944
3945 // Avoid increasing the max critical pressure in the scheduled region.
3946 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3947 Cand.RPDelta.CriticalMax,
3948 TryCand, Cand, RegCritical, TRI,
3949 DAG->MF))
3950 return TryCand.Reason != NoCand;
3951
3952 // We only compare a subset of features when comparing nodes between
3953 // Top and Bottom boundary. Some properties are simply incomparable, in many
3954 // other instances we should only override the other boundary if something
3955 // is a clear good pick on one boundary. Skip heuristics that are more
3956 // "tie-breaking" in nature.
3957 bool SameBoundary = Zone != nullptr;
3958 if (SameBoundary) {
3959 // For loops that are acyclic path limited, aggressively schedule for
3960 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3961 // heuristics to take precedence.
3962 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3963 tryLatency(TryCand, Cand, *Zone))
3964 return TryCand.Reason != NoCand;
3965
3966 // Prioritize instructions that read unbuffered resources by stall cycles.
3967 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3968 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3969 return TryCand.Reason != NoCand;
3970 }
3971
3972 // Keep clustered nodes together to encourage downstream peephole
3973 // optimizations which may reduce resource requirements.
3974 //
3975 // This is a best effort to set things up for a post-RA pass. Optimizations
3976 // like generating loads of multiple registers should ideally be done within
3977 // the scheduler pass by combining the loads during DAG postprocessing.
3978 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
3979 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
3980 bool CandIsClusterSucc =
3981 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
3982 bool TryCandIsClusterSucc =
3983 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
3984
3985 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
3986 Cluster))
3987 return TryCand.Reason != NoCand;
3988
3989 if (SameBoundary) {
3990 // Weak edges are for clustering and other constraints.
3991 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3992 getWeakLeft(Cand.SU, Cand.AtTop),
3993 TryCand, Cand, Weak))
3994 return TryCand.Reason != NoCand;
3995 }
3996
3997 // Avoid increasing the max pressure of the entire region.
3998 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3999 Cand.RPDelta.CurrentMax,
4000 TryCand, Cand, RegMax, TRI,
4001 DAG->MF))
4002 return TryCand.Reason != NoCand;
4003
4004 if (SameBoundary) {
4005 // Avoid critical resource consumption and balance the schedule.
4008 TryCand, Cand, ResourceReduce))
4009 return TryCand.Reason != NoCand;
4012 TryCand, Cand, ResourceDemand))
4013 return TryCand.Reason != NoCand;
4014
4015 // Avoid serializing long latency dependence chains.
4016 // For acyclic path limited loops, latency was already checked above.
4017 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
4018 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
4019 return TryCand.Reason != NoCand;
4020
4021 // Fall through to original instruction order.
4022 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
4023 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
4024 TryCand.Reason = NodeOrder;
4025 return true;
4026 }
4027 }
4028
4029 return false;
4030}
4031
4032/// Pick the best candidate from the queue.
4033///
4034/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
4035/// DAG building. To adjust for the current scheduling location we need to
4036/// maintain the number of vreg uses remaining to be top-scheduled.
4038 const CandPolicy &ZonePolicy,
4039 const RegPressureTracker &RPTracker,
4040 SchedCandidate &Cand) {
4041 // getMaxPressureDelta temporarily modifies the tracker.
4042 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
4043
4044 ReadyQueue &Q = Zone.Available;
4045 for (SUnit *SU : Q) {
4046
4047 SchedCandidate TryCand(ZonePolicy);
4048 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
4049 // Pass SchedBoundary only when comparing nodes from the same boundary.
4050 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
4051 if (tryCandidate(Cand, TryCand, ZoneArg)) {
4052 // Initialize resource delta if needed in case future heuristics query it.
4053 if (TryCand.ResDelta == SchedResourceDelta())
4055 Cand.setBest(TryCand);
4057 }
4058 }
4059}
4060
4061/// Pick the best candidate node from either the top or bottom queue.
4063 // Schedule as far as possible in the direction of no choice. This is most
4064 // efficient, but also provides the best heuristics for CriticalPSets.
4065 if (SUnit *SU = Bot.pickOnlyChoice()) {
4066 IsTopNode = false;
4067 tracePick(Only1, /*IsTopNode=*/false);
4068 return SU;
4069 }
4070 if (SUnit *SU = Top.pickOnlyChoice()) {
4071 IsTopNode = true;
4072 tracePick(Only1, /*IsTopNode=*/true);
4073 return SU;
4074 }
4075 // Set the bottom-up policy based on the state of the current bottom zone and
4076 // the instructions outside the zone, including the top zone.
4077 CandPolicy BotPolicy;
4078 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
4079 // Set the top-down policy based on the state of the current top zone and
4080 // the instructions outside the zone, including the bottom zone.
4081 CandPolicy TopPolicy;
4082 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
4083
4084 // See if BotCand is still valid (because we previously scheduled from Top).
4085 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4086 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4087 BotCand.Policy != BotPolicy) {
4088 BotCand.reset(CandPolicy());
4089 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
4090 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4091 } else {
4093#ifndef NDEBUG
4094 if (VerifyScheduling) {
4095 SchedCandidate TCand;
4096 TCand.reset(CandPolicy());
4097 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
4098 assert(TCand.SU == BotCand.SU &&
4099 "Last pick result should correspond to re-picking right now");
4100 }
4101#endif
4102 }
4103
4104 // Check if the top Q has a better candidate.
4105 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4106 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4107 TopCand.Policy != TopPolicy) {
4108 TopCand.reset(CandPolicy());
4109 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
4110 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4111 } else {
4113#ifndef NDEBUG
4114 if (VerifyScheduling) {
4115 SchedCandidate TCand;
4116 TCand.reset(CandPolicy());
4117 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
4118 assert(TCand.SU == TopCand.SU &&
4119 "Last pick result should correspond to re-picking right now");
4120 }
4121#endif
4122 }
4123
4124 // Pick best from BotCand and TopCand.
4125 assert(BotCand.isValid());
4126 assert(TopCand.isValid());
4127 SchedCandidate Cand = BotCand;
4128 TopCand.Reason = NoCand;
4129 if (tryCandidate(Cand, TopCand, nullptr)) {
4130 Cand.setBest(TopCand);
4132 }
4133
4134 IsTopNode = Cand.AtTop;
4135 tracePick(Cand);
4136 return Cand.SU;
4137}
4138
4139/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
4141 if (DAG->top() == DAG->bottom()) {
4142 assert(Top.Available.empty() && Top.Pending.empty() &&
4143 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4144 return nullptr;
4145 }
4146 SUnit *SU;
4147 if (RegionPolicy.OnlyTopDown) {
4148 SU = Top.pickOnlyChoice();
4149 if (!SU) {
4150 CandPolicy NoPolicy;
4151 TopCand.reset(NoPolicy);
4152 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
4153 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4155 SU = TopCand.SU;
4156 }
4157 IsTopNode = true;
4158 } else if (RegionPolicy.OnlyBottomUp) {
4159 SU = Bot.pickOnlyChoice();
4160 if (!SU) {
4161 CandPolicy NoPolicy;
4162 BotCand.reset(NoPolicy);
4163 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
4164 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4166 SU = BotCand.SU;
4167 }
4168 IsTopNode = false;
4169 } else {
4170 SU = pickNodeBidirectional(IsTopNode);
4171 }
4172 assert(!SU->isScheduled && "SUnit scheduled twice.");
4173
4174 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
4175 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
4176 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
4177 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
4178 //
4179 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
4180 // isBottomReady. That is, it didn't factor into the decision to choose SU
4181 // because it isTopReady or isBottomReady, respectively. In fact, if the
4182 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
4183 // queues respectivley contain the original roots and don't get updated when
4184 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
4185 // because we schduled everything but the top roots. Conversley, if SU
4186 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
4187 // but the bottom roots. If its in a queue even coincidentally, it should be
4188 // removed so it does not get re-picked in a subsequent pickNode call.
4189 if (SU->isTopReady())
4190 Top.removeReady(SU);
4191 if (SU->isBottomReady())
4192 Bot.removeReady(SU);
4193
4194 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4195 << *SU->getInstr());
4196
4197 if (IsTopNode) {
4198 if (SU->NodeNum == TopIdx++)
4199 ++NumInstrsInSourceOrderPreRA;
4200 } else {
4201 assert(BotIdx < NumRegionInstrs && "out of bounds");
4202 if (SU->NodeNum == BotIdx--)
4203 ++NumInstrsInSourceOrderPreRA;
4204 }
4205
4206 NumInstrsScheduledPreRA += 1;
4207
4208 return SU;
4209}
4210
4212 MachineBasicBlock::iterator InsertPos = SU->getInstr();
4213 if (!isTop)
4214 ++InsertPos;
4215 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
4216
4217 // Find already scheduled copies with a single physreg dependence and move
4218 // them just above the scheduled instruction.
4219 for (SDep &Dep : Deps) {
4220 if (Dep.getKind() != SDep::Data || !Dep.getReg().isPhysical())
4221 continue;
4222 SUnit *DepSU = Dep.getSUnit();
4223 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
4224 continue;
4225 MachineInstr *Copy = DepSU->getInstr();
4226 if (!Copy->isCopy() && !Copy->isMoveImmediate())
4227 continue;
4228 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
4229 DAG->dumpNode(*Dep.getSUnit()));
4230 DAG->moveInstruction(Copy, InsertPos);
4231 }
4232}
4233
4234/// Update the scheduler's state after scheduling a node. This is the same node
4235/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
4236/// update it's state based on the current cycle before MachineSchedStrategy
4237/// does.
4238///
4239/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
4240/// them here. See comments in biasPhysReg.
4241void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4242 if (IsTopNode) {
4243 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4245 LLVM_DEBUG({
4247 ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
4248 dbgs() << " Top Cluster: ";
4249 for (auto *N : *TopCluster)
4250 dbgs() << N->NodeNum << '\t';
4251 dbgs() << '\n';
4252 }
4253 });
4254 Top.bumpNode(SU);
4255 if (SU->hasPhysRegUses)
4256 reschedulePhysReg(SU, true);
4257 } else {
4258 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4260 LLVM_DEBUG({
4262 ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
4263 dbgs() << " Bot Cluster: ";
4264 for (auto *N : *BotCluster)
4265 dbgs() << N->NodeNum << '\t';
4266 dbgs() << '\n';
4267 }
4268 });
4269 Bot.bumpNode(SU);
4270 if (SU->hasPhysRegDefs)
4271 reschedulePhysReg(SU, false);
4272 }
4273}
4274
4278
4279static MachineSchedRegistry
4280GenericSchedRegistry("converge", "Standard converging scheduler.",
4282
4283//===----------------------------------------------------------------------===//
4284// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4285//===----------------------------------------------------------------------===//
4286
4288 DAG = Dag;
4289 SchedModel = DAG->getSchedModel();
4290 TRI = DAG->TRI;
4291
4292 Rem.init(DAG, SchedModel);
4293 Top.init(DAG, SchedModel, &Rem);
4294 Bot.init(DAG, SchedModel, &Rem);
4295
4296 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4297 // or are disabled, then these HazardRecs will be disabled.
4298 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
4299 if (!Top.HazardRec) {
4300 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4301 }
4302 if (!Bot.HazardRec) {
4303 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4304 }
4307}
4308
4311 unsigned NumRegionInstrs) {
4312 const MachineFunction &MF = *Begin->getMF();
4313
4314 // Default to top-down because it was implemented first and existing targets
4315 // expect that behavior by default.
4316 RegionPolicy.OnlyTopDown = true;
4317 RegionPolicy.OnlyBottomUp = false;
4318
4319 // Allow the subtarget to override default policy.
4320 SchedRegion Region(Begin, End, NumRegionInstrs);
4322
4323 // After subtarget overrides, apply command line options.
4325 RegionPolicy.OnlyTopDown = true;
4326 RegionPolicy.OnlyBottomUp = false;
4327 } else if (PostRADirection == MISched::BottomUp) {
4328 RegionPolicy.OnlyTopDown = false;
4329 RegionPolicy.OnlyBottomUp = true;
4331 RegionPolicy.OnlyBottomUp = false;
4332 RegionPolicy.OnlyTopDown = false;
4333 }
4334
4335 BotIdx = NumRegionInstrs - 1;
4336 this->NumRegionInstrs = NumRegionInstrs;
4337}
4338
4340 Rem.CriticalPath = DAG->ExitSU.getDepth();
4341
4342 // Some roots may not feed into ExitSU. Check all of them in case.
4343 for (const SUnit *SU : Bot.Available) {
4344 if (SU->getDepth() > Rem.CriticalPath)
4345 Rem.CriticalPath = SU->getDepth();
4346 }
4347 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
4349 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4350 }
4351}
4352
4353/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4354///
4355/// \param Cand provides the policy and current best candidate.
4356/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4357/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4359 SchedCandidate &TryCand) {
4360 // Initialize the candidate if needed.
4361 if (!Cand.isValid()) {
4362 TryCand.Reason = FirstValid;
4363 return true;
4364 }
4365
4366 // Prioritize instructions that read unbuffered resources by stall cycles.
4367 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
4368 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
4369 return TryCand.Reason != NoCand;
4370
4371 // Keep clustered nodes together.
4372 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
4373 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
4374 bool CandIsClusterSucc =
4375 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
4376 bool TryCandIsClusterSucc =
4377 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
4378
4379 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
4380 Cluster))
4381 return TryCand.Reason != NoCand;
4382 // Avoid critical resource consumption and balance the schedule.
4384 TryCand, Cand, ResourceReduce))
4385 return TryCand.Reason != NoCand;
4388 TryCand, Cand, ResourceDemand))
4389 return TryCand.Reason != NoCand;
4390
4391 // We only compare a subset of features when comparing nodes between
4392 // Top and Bottom boundary.
4393 if (Cand.AtTop == TryCand.AtTop) {
4394 // Avoid serializing long latency dependence chains.
4395 if (Cand.Policy.ReduceLatency &&
4396 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
4397 return TryCand.Reason != NoCand;
4398 }
4399
4400 // Fall through to original instruction order.
4401 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4402 TryCand.Reason = NodeOrder;
4403 return true;
4404 }
4405
4406 return false;
4407}
4408
4410 SchedCandidate &Cand) {
4411 ReadyQueue &Q = Zone.Available;
4412 for (SUnit *SU : Q) {
4413 SchedCandidate TryCand(Cand.Policy);
4414 TryCand.SU = SU;
4415 TryCand.AtTop = Zone.isTop();
4417 if (tryCandidate(Cand, TryCand)) {
4418 Cand.setBest(TryCand);
4420 }
4421 }
4422}
4423
4424/// Pick the best candidate node from either the top or bottom queue.
4426 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4427 // out common parts.
4428
4429 // Schedule as far as possible in the direction of no choice. This is most
4430 // efficient, but also provides the best heuristics for CriticalPSets.
4431 if (SUnit *SU = Bot.pickOnlyChoice()) {
4432 IsTopNode = false;
4433 tracePick(Only1, /*IsTopNode=*/false, /*IsPostRA=*/true);
4434 return SU;
4435 }
4436 if (SUnit *SU = Top.pickOnlyChoice()) {
4437 IsTopNode = true;
4438 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4439 return SU;
4440 }
4441 // Set the bottom-up policy based on the state of the current bottom zone and
4442 // the instructions outside the zone, including the top zone.
4443 CandPolicy BotPolicy;
4444 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4445 // Set the top-down policy based on the state of the current top zone and
4446 // the instructions outside the zone, including the bottom zone.
4447 CandPolicy TopPolicy;
4448 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4449
4450 // See if BotCand is still valid (because we previously scheduled from Top).
4451 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4452 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4453 BotCand.Policy != BotPolicy) {
4454 BotCand.reset(CandPolicy());
4456 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4457 } else {
4459#ifndef NDEBUG
4460 if (VerifyScheduling) {
4461 SchedCandidate TCand;
4462 TCand.reset(CandPolicy());
4464 assert(TCand.SU == BotCand.SU &&
4465 "Last pick result should correspond to re-picking right now");
4466 }
4467#endif
4468 }
4469
4470 // Check if the top Q has a better candidate.
4471 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4472 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4473 TopCand.Policy != TopPolicy) {
4474 TopCand.reset(CandPolicy());
4476 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4477 } else {
4479#ifndef NDEBUG
4480 if (VerifyScheduling) {
4481 SchedCandidate TCand;
4482 TCand.reset(CandPolicy());
4484 assert(TCand.SU == TopCand.SU &&
4485 "Last pick result should correspond to re-picking right now");
4486 }
4487#endif
4488 }
4489
4490 // Pick best from BotCand and TopCand.
4491 assert(BotCand.isValid());
4492 assert(TopCand.isValid());
4493 SchedCandidate Cand = BotCand;
4494 TopCand.Reason = NoCand;
4495 if (tryCandidate(Cand, TopCand)) {
4496 Cand.setBest(TopCand);
4498 }
4499
4500 IsTopNode = Cand.AtTop;
4501 tracePick(Cand, /*IsPostRA=*/true);
4502 return Cand.SU;
4503}
4504
4505/// Pick the next node to schedule.
4507 if (DAG->top() == DAG->bottom()) {
4508 assert(Top.Available.empty() && Top.Pending.empty() &&
4509 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4510 return nullptr;
4511 }
4512 SUnit *SU;
4513 if (RegionPolicy.OnlyBottomUp) {
4514 SU = Bot.pickOnlyChoice();
4515 if (SU) {
4516 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4517 } else {
4518 CandPolicy NoPolicy;
4519 BotCand.reset(NoPolicy);
4520 // Set the bottom-up policy based on the state of the current bottom
4521 // zone and the instructions outside the zone, including the top zone.
4522 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4524 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4525 tracePick(BotCand, /*IsPostRA=*/true);
4526 SU = BotCand.SU;
4527 }
4528 IsTopNode = false;
4529 } else if (RegionPolicy.OnlyTopDown) {
4530 SU = Top.pickOnlyChoice();
4531 if (SU) {
4532 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4533 } else {
4534 CandPolicy NoPolicy;
4535 TopCand.reset(NoPolicy);
4536 // Set the top-down policy based on the state of the current top zone
4537 // and the instructions outside the zone, including the bottom zone.
4538 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4540 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4541 tracePick(TopCand, /*IsPostRA=*/true);
4542 SU = TopCand.SU;
4543 }
4544 IsTopNode = true;
4545 } else {
4546 SU = pickNodeBidirectional(IsTopNode);
4547 }
4548 assert(!SU->isScheduled && "SUnit scheduled twice.");
4549
4550 if (SU->isTopReady())
4551 Top.removeReady(SU);
4552 if (SU->isBottomReady())
4553 Bot.removeReady(SU);
4554
4555 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4556 << *SU->getInstr());
4557
4558 if (IsTopNode) {
4559 if (SU->NodeNum == TopIdx++)
4560 ++NumInstrsInSourceOrderPostRA;
4561 } else {
4562 assert(BotIdx < NumRegionInstrs && "out of bounds");
4563 if (SU->NodeNum == BotIdx--)
4564 ++NumInstrsInSourceOrderPostRA;
4565 }
4566
4567 NumInstrsScheduledPostRA += 1;
4568
4569 return SU;
4570}
4571
4572/// Called after ScheduleDAGMI has scheduled an instruction and updated
4573/// scheduled/remaining flags in the DAG nodes.
4574void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4575 if (IsTopNode) {
4576 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4578 Top.bumpNode(SU);
4579 } else {
4580 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4582 Bot.bumpNode(SU);
4583 }
4584}
4585
4586//===----------------------------------------------------------------------===//
4587// ILP Scheduler. Currently for experimental analysis of heuristics.
4588//===----------------------------------------------------------------------===//
4589
4590namespace {
4591
4592/// Order nodes by the ILP metric.
4593struct ILPOrder {
4594 const SchedDFSResult *DFSResult = nullptr;
4595 const BitVector *ScheduledTrees = nullptr;
4596 bool MaximizeILP;
4597
4598 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4599
4600 /// Apply a less-than relation on node priority.
4601 ///
4602 /// (Return true if A comes after B in the Q.)
4603 bool operator()(const SUnit *A, const SUnit *B) const {
4604 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4605 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4606 if (SchedTreeA != SchedTreeB) {
4607 // Unscheduled trees have lower priority.
4608 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4609 return ScheduledTrees->test(SchedTreeB);
4610
4611 // Trees with shallower connections have lower priority.
4612 if (DFSResult->getSubtreeLevel(SchedTreeA)
4613 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4614 return DFSResult->getSubtreeLevel(SchedTreeA)
4615 < DFSResult->getSubtreeLevel(SchedTreeB);
4616 }
4617 }
4618 if (MaximizeILP)
4619 return DFSResult->getILP(A) < DFSResult->getILP(B);
4620 else
4621 return DFSResult->getILP(A) > DFSResult->getILP(B);
4622 }
4623};
4624
4625/// Schedule based on the ILP metric.
4626class ILPScheduler : public MachineSchedStrategy {
4627 ScheduleDAGMILive *DAG = nullptr;
4628 ILPOrder Cmp;
4629
4630 std::vector<SUnit*> ReadyQ;
4631
4632public:
4633 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4634
4635 void initialize(ScheduleDAGMI *dag) override {
4636 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4637 DAG = static_cast<ScheduleDAGMILive*>(dag);
4638 DAG->computeDFSResult();
4639 Cmp.DFSResult = DAG->getDFSResult();
4640 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4641 ReadyQ.clear();
4642 }
4643
4644 void registerRoots() override {
4645 // Restore the heap in ReadyQ with the updated DFS results.
4646 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4647 }
4648
4649 /// Implement MachineSchedStrategy interface.
4650 /// -----------------------------------------
4651
4652 /// Callback to select the highest priority node from the ready Q.
4653 SUnit *pickNode(bool &IsTopNode) override {
4654 if (ReadyQ.empty()) return nullptr;
4655 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4656 SUnit *SU = ReadyQ.back();
4657 ReadyQ.pop_back();
4658 IsTopNode = false;
4659 LLVM_DEBUG(dbgs() << "Pick node "
4660 << "SU(" << SU->NodeNum << ") "
4661 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4662 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4663 << " @"
4664 << DAG->getDFSResult()->getSubtreeLevel(
4665 DAG->getDFSResult()->getSubtreeID(SU))
4666 << '\n'
4667 << "Scheduling " << *SU->getInstr());
4668 return SU;
4669 }
4670
4671 /// Scheduler callback to notify that a new subtree is scheduled.
4672 void scheduleTree(unsigned SubtreeID) override {
4673 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4674 }
4675
4676 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4677 /// DFSResults, and resort the priority Q.
4678 void schedNode(SUnit *SU, bool IsTopNode) override {
4679 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4680 }
4681
4682 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4683
4684 void releaseBottomNode(SUnit *SU) override {
4685 ReadyQ.push_back(SU);
4686 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4687 }
4688};
4689
4690} // end anonymous namespace
4691
4693 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4694}
4696 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4697}
4698
4700 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4702 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4703
4704//===----------------------------------------------------------------------===//
4705// Machine Instruction Shuffler for Correctness Testing
4706//===----------------------------------------------------------------------===//
4707
4708#ifndef NDEBUG
4709namespace {
4710
4711/// Apply a less-than relation on the node order, which corresponds to the
4712/// instruction order prior to scheduling. IsReverse implements greater-than.
4713template<bool IsReverse>
4714struct SUnitOrder {
4715 bool operator()(SUnit *A, SUnit *B) const {
4716 if (IsReverse)
4717 return A->NodeNum > B->NodeNum;
4718 else
4719 return A->NodeNum < B->NodeNum;
4720 }
4721};
4722
4723/// Reorder instructions as much as possible.
4724class InstructionShuffler : public MachineSchedStrategy {
4725 bool IsAlternating;
4726 bool IsTopDown;
4727
4728 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4729 // gives nodes with a higher number higher priority causing the latest
4730 // instructions to be scheduled first.
4731 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4732 TopQ;
4733
4734 // When scheduling bottom-up, use greater-than as the queue priority.
4735 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4736 BottomQ;
4737
4738public:
4739 InstructionShuffler(bool alternate, bool topdown)
4740 : IsAlternating(alternate), IsTopDown(topdown) {}
4741
4742 void initialize(ScheduleDAGMI*) override {
4743 TopQ.clear();
4744 BottomQ.clear();
4745 }
4746
4747 /// Implement MachineSchedStrategy interface.
4748 /// -----------------------------------------
4749
4750 SUnit *pickNode(bool &IsTopNode) override {
4751 SUnit *SU;
4752 if (IsTopDown) {
4753 do {
4754 if (TopQ.empty()) return nullptr;
4755 SU = TopQ.top();
4756 TopQ.pop();
4757 } while (SU->isScheduled);
4758 IsTopNode = true;
4759 } else {
4760 do {
4761 if (BottomQ.empty()) return nullptr;
4762 SU = BottomQ.top();
4763 BottomQ.pop();
4764 } while (SU->isScheduled);
4765 IsTopNode = false;
4766 }
4767 if (IsAlternating)
4768 IsTopDown = !IsTopDown;
4769 return SU;
4770 }
4771
4772 void schedNode(SUnit *SU, bool IsTopNode) override {}
4773
4774 void releaseTopNode(SUnit *SU) override {
4775 TopQ.push(SU);
4776 }
4777 void releaseBottomNode(SUnit *SU) override {
4778 BottomQ.push(SU);
4779 }
4780};
4781
4782} // end anonymous namespace
4783
4785 bool Alternate =
4787 bool TopDown = PreRADirection != MISched::BottomUp;
4788 return new ScheduleDAGMILive(
4789 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4790}
4791
4793 "shuffle", "Shuffle machine instructions alternating directions",
4795#endif // !NDEBUG
4796
4797//===----------------------------------------------------------------------===//
4798// GraphWriter support for ScheduleDAGMILive.
4799//===----------------------------------------------------------------------===//
4800
4801#ifndef NDEBUG
4802
4803template <>
4806
4807template <>
4810
4811 static std::string getGraphName(const ScheduleDAG *G) {
4812 return std::string(G->MF.getName());
4813 }
4814
4816 return true;
4817 }
4818
4819 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4820 if (ViewMISchedCutoff == 0)
4821 return false;
4822 return (Node->Preds.size() > ViewMISchedCutoff
4823 || Node->Succs.size() > ViewMISchedCutoff);
4824 }
4825
4826 /// If you want to override the dot attributes printed for a particular
4827 /// edge, override this method.
4828 static std::string getEdgeAttributes(const SUnit *Node,
4829 SUnitIterator EI,
4830 const ScheduleDAG *Graph) {
4831 if (EI.isArtificialDep())
4832 return "color=cyan,style=dashed";
4833 if (EI.isCtrlDep())
4834 return "color=blue,style=dashed";
4835 return "";
4836 }
4837
4838 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4839 std::string Str;
4840 raw_string_ostream SS(Str);
4841 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4842 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4843 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4844 SS << "SU:" << SU->NodeNum;
4845 if (DFS)
4846 SS << " I:" << DFS->getNumInstrs(SU);
4847 return Str;
4848 }
4849
4850 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4851 return G->getGraphNodeLabel(SU);
4852 }
4853
4854 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4855 std::string Str("shape=Mrecord");
4856 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4857 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4858 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4859 if (DFS) {
4860 Str += ",style=filled,fillcolor=\"#";
4861 Str += DOT::getColorString(DFS->getSubtreeID(N));
4862 Str += '"';
4863 }
4864 return Str;
4865 }
4866};
4867
4868#endif // NDEBUG
4869
4870/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4871/// rendered using 'dot'.
4872void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4873#ifndef NDEBUG
4874 ViewGraph(this, Name, false, Title);
4875#else
4876 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4877 << "systems with Graphviz or gv!\n";
4878#endif // NDEBUG
4879}
4880
4881/// Out-of-line implementation with no arguments is handy for gdb.
4883 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4884}
4885
4886/// Sort predicate for the intervals stored in an instance of
4887/// ResourceSegments. Intervals are always disjoint (no intersection
4888/// for any pairs of intervals), therefore we can sort the totality of
4889/// the intervals by looking only at the left boundary.
4892 return A.first < B.first;
4893}
4894
4895unsigned ResourceSegments::getFirstAvailableAt(
4896 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4897 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4898 IntervalBuilder) const {
4899 assert(llvm::is_sorted(_Intervals, sortIntervals) &&
4900 "Cannot execute on an un-sorted set of intervals.");
4901
4902 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4903 // a ResourceSegment interval for that situation.
4904 if (AcquireAtCycle == ReleaseAtCycle)
4905 return CurrCycle;
4906
4907 unsigned RetCycle = CurrCycle;
4908 ResourceSegments::IntervalTy NewInterval =
4909 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4910 for (auto &Interval : _Intervals) {
4911 if (!intersects(NewInterval, Interval))
4912 continue;
4913
4914 // Move the interval right next to the top of the one it
4915 // intersects.
4916 assert(Interval.second > NewInterval.first &&
4917 "Invalid intervals configuration.");
4918 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4919 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4920 }
4921 return RetCycle;
4922}
4923
4925 const unsigned CutOff) {
4926 assert(A.first <= A.second && "Cannot add negative resource usage");
4927 assert(CutOff > 0 && "0-size interval history has no use.");
4928 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4929 // instruction needed the resource to be available but does not use it.
4930 // However, ResourceSegment represents an interval that is closed on the left
4931 // and open on the right. It is impossible to represent an empty interval when
4932 // the left is closed. Do not add it to Intervals.
4933 if (A.first == A.second)
4934 return;
4935
4936 assert(all_of(_Intervals,
4937 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4938 return !intersects(A, Interval);
4939 }) &&
4940 "A resource is being overwritten");
4941 _Intervals.push_back(A);
4942
4943 sortAndMerge();
4944
4945 // Do not keep the full history of the intervals, just the
4946 // latest #CutOff.
4947 while (_Intervals.size() > CutOff)
4948 _Intervals.pop_front();
4949}
4950
4953 assert(A.first <= A.second && "Invalid interval");
4954 assert(B.first <= B.second && "Invalid interval");
4955
4956 // Share one boundary.
4957 if ((A.first == B.first) || (A.second == B.second))
4958 return true;
4959
4960 // full intersersect: [ *** ) B
4961 // [***) A
4962 if ((A.first > B.first) && (A.second < B.second))
4963 return true;
4964
4965 // right intersect: [ ***) B
4966 // [*** ) A
4967 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4968 return true;
4969
4970 // left intersect: [*** ) B
4971 // [ ***) A
4972 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4973 return true;
4974
4975 return false;
4976}
4977
4978void ResourceSegments::sortAndMerge() {
4979 if (_Intervals.size() <= 1)
4980 return;
4981
4982 // First sort the collection.
4983 _Intervals.sort(sortIntervals);
4984
4985 // can use next because I have at least 2 elements in the list
4986 auto next = std::next(std::begin(_Intervals));
4987 auto E = std::end(_Intervals);
4988 for (; next != E; ++next) {
4989 if (std::prev(next)->second >= next->first) {
4990 next->first = std::prev(next)->first;
4991 _Intervals.erase(std::prev(next));
4992 continue;
4993 }
4994 }
4995}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
Function Alias Analysis false
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, ArrayRef< InsnRange > Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
PostRA Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA=false)
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
SmallVector< SchedRegion, 16 > MBBRegionsVector
static cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PriorityQueue class.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
reverse_iterator rend() const
Definition ArrayRef.h:134
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
reverse_iterator rbegin() const
Definition ArrayRef.h:133
bool test(unsigned Idx) const
Definition BitVector.h:480
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
Register getReg() const
ECValue - The EquivalenceClasses data structure is just a set of these.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Segments::iterator iterator
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
iterator begin()
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
static LocationSize precise(uint64_t Value)
MachineInstrBundleIterator< const MachineInstr > const_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
Representation of each machine instruction.
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static LLVM_ABI MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI MachineSchedulerPass(const TargetMachine *TM)
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SchedCandidate TopCand
Candidate last picked from Top boundary.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
LLVM_ABI void dump(const TargetRegisterInfo &TRI) const
LLVM_ABI void addPressureChange(VirtRegOrUnit VRegOrUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
void clear()
clear - Erase all elements from the queue.
Helpers for implementing custom MachineSchedStrategy classes.
ArrayRef< SUnit * > elements()
LLVM_ABI void dump() const
std::vector< SUnit * >::iterator iterator
StringRef getName() const
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
LLVM_ABI void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
LLVM_ABI void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
LLVM_ABI void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static LLVM_ABI bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
bool isWeak() const
Tests if this a weak dependence.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition ScheduleDAG.h:76
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
@ Weak
Arbitrary weak DAG edge.
Definition ScheduleDAG.h:75
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Register getReg() const
Returns the register associated with this edge.
bool isArtificialDep() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned NumSuccsLeft
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned NumPredsLeft
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
unsigned WeakPredsLeft
bool isBottomReady() const
bool hasPhysRegUses
Has physreg uses.
bool isTopReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
bool isReservedGroup(unsigned PIdx) const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
LLVM_ABI void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
LLVM_ABI void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
LLVM_ABI void dumpScheduledState() const
LLVM_ABI void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
A ScheduleDAG for scheduling lists of MachineInstr.
SmallVector< ClusterInfo > & getClusters()
Returns the array of the clusters.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph(const Twine &Name, const Twine &Title) override
viewGraph - Pop up a ghostview window with the reachable parts of the DAG rendered using 'dot'.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
std::reverse_iterator< const_iterator > const_reverse_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
const MCWriteProcResEntry * ProcResIter
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
LLVM_ABI unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic scheduling policy within a region.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
A raw_ostream that writes to an std::string.
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
ScheduleDAGMILive * createSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
@ Offset
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
void stable_sort(R &&Range)
Definition STLExtras.h:2116
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition Format.h:157
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition LaneBitmask.h:92
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
LLVM_ABI char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
ScheduleDAGMI * createSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
cl::opt< bool > ViewMISchedDAGs
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI cl::opt< bool > VerifyScheduling
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr unsigned InvalidClusterId
@ Other
Any other memory.
Definition ModRef.h:68
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition Format.h:150
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
SmallPtrSet< SUnit *, 8 > ClusterInfo
Keep record of which SUnit are in the same cluster group.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI cl::opt< MISched::Direction > PreRADirection
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
cl::opt< bool > PrintDAGs
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
MachineBlockFrequencyInfo * MBFI
const MachineLoopInfo * MLI
const TargetMachine * TM
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A region of an MBB for scheduling.
Summarize the unscheduled region.
LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.