LLVM 22.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
52#include "llvm/Config/llvm-config.h"
54#include "llvm/MC/LaneBitmask.h"
55#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <limits>
68#include <memory>
69#include <string>
70#include <tuple>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "machine-scheduler"
77
78STATISTIC(NumInstrsInSourceOrderPreRA,
79 "Number of instructions in source order after pre-RA scheduling");
80STATISTIC(NumInstrsInSourceOrderPostRA,
81 "Number of instructions in source order after post-RA scheduling");
82STATISTIC(NumInstrsScheduledPreRA,
83 "Number of instructions scheduled by pre-RA scheduler");
84STATISTIC(NumInstrsScheduledPostRA,
85 "Number of instructions scheduled by post-RA scheduler");
86STATISTIC(NumClustered, "Number of load/store pairs clustered");
87
88STATISTIC(NumTopPreRA,
89 "Number of scheduling units chosen from top queue pre-RA");
90STATISTIC(NumBotPreRA,
91 "Number of scheduling units chosen from bottom queue pre-RA");
92STATISTIC(NumNoCandPreRA,
93 "Number of scheduling units chosen for NoCand heuristic pre-RA");
94STATISTIC(NumOnly1PreRA,
95 "Number of scheduling units chosen for Only1 heuristic pre-RA");
96STATISTIC(NumPhysRegPreRA,
97 "Number of scheduling units chosen for PhysReg heuristic pre-RA");
98STATISTIC(NumRegExcessPreRA,
99 "Number of scheduling units chosen for RegExcess heuristic pre-RA");
100STATISTIC(NumRegCriticalPreRA,
101 "Number of scheduling units chosen for RegCritical heuristic pre-RA");
102STATISTIC(NumStallPreRA,
103 "Number of scheduling units chosen for Stall heuristic pre-RA");
104STATISTIC(NumClusterPreRA,
105 "Number of scheduling units chosen for Cluster heuristic pre-RA");
106STATISTIC(NumWeakPreRA,
107 "Number of scheduling units chosen for Weak heuristic pre-RA");
108STATISTIC(NumRegMaxPreRA,
109 "Number of scheduling units chosen for RegMax heuristic pre-RA");
111 NumResourceReducePreRA,
112 "Number of scheduling units chosen for ResourceReduce heuristic pre-RA");
114 NumResourceDemandPreRA,
115 "Number of scheduling units chosen for ResourceDemand heuristic pre-RA");
117 NumTopDepthReducePreRA,
118 "Number of scheduling units chosen for TopDepthReduce heuristic pre-RA");
120 NumTopPathReducePreRA,
121 "Number of scheduling units chosen for TopPathReduce heuristic pre-RA");
123 NumBotHeightReducePreRA,
124 "Number of scheduling units chosen for BotHeightReduce heuristic pre-RA");
126 NumBotPathReducePreRA,
127 "Number of scheduling units chosen for BotPathReduce heuristic pre-RA");
128STATISTIC(NumNodeOrderPreRA,
129 "Number of scheduling units chosen for NodeOrder heuristic pre-RA");
130STATISTIC(NumFirstValidPreRA,
131 "Number of scheduling units chosen for FirstValid heuristic pre-RA");
132
133STATISTIC(NumTopPostRA,
134 "Number of scheduling units chosen from top queue post-RA");
135STATISTIC(NumBotPostRA,
136 "Number of scheduling units chosen from bottom queue post-RA");
137STATISTIC(NumNoCandPostRA,
138 "Number of scheduling units chosen for NoCand heuristic post-RA");
139STATISTIC(NumOnly1PostRA,
140 "Number of scheduling units chosen for Only1 heuristic post-RA");
141STATISTIC(NumPhysRegPostRA,
142 "Number of scheduling units chosen for PhysReg heuristic post-RA");
143STATISTIC(NumRegExcessPostRA,
144 "Number of scheduling units chosen for RegExcess heuristic post-RA");
146 NumRegCriticalPostRA,
147 "Number of scheduling units chosen for RegCritical heuristic post-RA");
148STATISTIC(NumStallPostRA,
149 "Number of scheduling units chosen for Stall heuristic post-RA");
150STATISTIC(NumClusterPostRA,
151 "Number of scheduling units chosen for Cluster heuristic post-RA");
152STATISTIC(NumWeakPostRA,
153 "Number of scheduling units chosen for Weak heuristic post-RA");
154STATISTIC(NumRegMaxPostRA,
155 "Number of scheduling units chosen for RegMax heuristic post-RA");
157 NumResourceReducePostRA,
158 "Number of scheduling units chosen for ResourceReduce heuristic post-RA");
160 NumResourceDemandPostRA,
161 "Number of scheduling units chosen for ResourceDemand heuristic post-RA");
163 NumTopDepthReducePostRA,
164 "Number of scheduling units chosen for TopDepthReduce heuristic post-RA");
166 NumTopPathReducePostRA,
167 "Number of scheduling units chosen for TopPathReduce heuristic post-RA");
169 NumBotHeightReducePostRA,
170 "Number of scheduling units chosen for BotHeightReduce heuristic post-RA");
172 NumBotPathReducePostRA,
173 "Number of scheduling units chosen for BotPathReduce heuristic post-RA");
174STATISTIC(NumNodeOrderPostRA,
175 "Number of scheduling units chosen for NodeOrder heuristic post-RA");
176STATISTIC(NumFirstValidPostRA,
177 "Number of scheduling units chosen for FirstValid heuristic post-RA");
178
180 "misched-prera-direction", cl::Hidden,
181 cl::desc("Pre reg-alloc list scheduling direction"),
184 clEnumValN(MISched::TopDown, "topdown",
185 "Force top-down pre reg-alloc list scheduling"),
186 clEnumValN(MISched::BottomUp, "bottomup",
187 "Force bottom-up pre reg-alloc list scheduling"),
188 clEnumValN(MISched::Bidirectional, "bidirectional",
189 "Force bidirectional pre reg-alloc list scheduling")));
190
192 "misched-postra-direction", cl::Hidden,
193 cl::desc("Post reg-alloc list scheduling direction"),
196 clEnumValN(MISched::TopDown, "topdown",
197 "Force top-down post reg-alloc list scheduling"),
198 clEnumValN(MISched::BottomUp, "bottomup",
199 "Force bottom-up post reg-alloc list scheduling"),
200 clEnumValN(MISched::Bidirectional, "bidirectional",
201 "Force bidirectional post reg-alloc list scheduling")));
202
203static cl::opt<bool>
205 cl::desc("Print critical path length to stdout"));
206
208 "verify-misched", cl::Hidden,
209 cl::desc("Verify machine instrs before and after machine scheduling"));
210
211#ifndef NDEBUG
213 "view-misched-dags", cl::Hidden,
214 cl::desc("Pop up a window to show MISched dags after they are processed"));
215cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden,
216 cl::desc("Print schedule DAGs"));
218 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
219 cl::desc("Dump resource usage at schedule boundary."));
221 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
222 cl::desc("Show details of invoking getNextResoufceCycle."));
223#else
224const bool llvm::ViewMISchedDAGs = false;
225const bool llvm::PrintDAGs = false;
226static const bool MischedDetailResourceBooking = false;
227#ifdef LLVM_ENABLE_DUMP
228static const bool MISchedDumpReservedCycles = false;
229#endif // LLVM_ENABLE_DUMP
230#endif // NDEBUG
231
232#ifndef NDEBUG
233/// In some situations a few uninteresting nodes depend on nearly all other
234/// nodes in the graph, provide a cutoff to hide them.
235static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
236 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
237
239 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
240
242 cl::desc("Only schedule this function"));
243static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
244 cl::desc("Only schedule this MBB#"));
245#endif // NDEBUG
246
247/// Avoid quadratic complexity in unusually large basic blocks by limiting the
248/// size of the ready lists.
250 cl::desc("Limit ready list to N instructions"), cl::init(256));
251
252static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
253 cl::desc("Enable register pressure scheduling."), cl::init(true));
254
255static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
256 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
257
259 cl::desc("Enable memop clustering."),
260 cl::init(true));
261static cl::opt<bool>
262 ForceFastCluster("force-fast-cluster", cl::Hidden,
263 cl::desc("Switch to fast cluster algorithm with the lost "
264 "of some fusion opportunities"),
265 cl::init(false));
267 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
268 cl::desc("The threshold for fast cluster"),
269 cl::init(1000));
270
271#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
273 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
274 cl::desc("Dump resource usage at schedule boundary."));
276 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
277 cl::desc("Set width of the columns with "
278 "the resources and schedule units"),
279 cl::init(19));
281 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
282 cl::desc("Set width of the columns showing resource booking."),
283 cl::init(5));
285 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
286 cl::desc("Sort the resources printed in the dump trace"));
287#endif
288
290 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
291 cl::desc("Number of intervals to track"), cl::init(10));
292
293// DAG subtrees must have at least this many nodes.
294static const unsigned MinSubtreeSize = 8;
295
296// Pin the vtables to this file.
297void MachineSchedStrategy::anchor() {}
298
299void ScheduleDAGMutation::anchor() {}
300
301//===----------------------------------------------------------------------===//
302// Machine Instruction Scheduling Pass and Registry
303//===----------------------------------------------------------------------===//
304
308
312
313namespace llvm {
314namespace impl_detail {
315
316/// Base class for the machine scheduler classes.
318protected:
319 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
320};
321
322/// Impl class for MachineScheduler.
324 // These are only for using MF.verify()
325 // remove when verify supports passing in all analyses
326 MachineFunctionPass *P = nullptr;
327 MachineFunctionAnalysisManager *MFAM = nullptr;
328
329public:
336
338 // Migration only
339 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
340 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
341
342 bool run(MachineFunction &MF, const TargetMachine &TM,
343 const RequiredAnalyses &Analyses);
344
345protected:
347};
348
349/// Impl class for PostMachineScheduler.
351 // These are only for using MF.verify()
352 // remove when verify supports passing in all analyses
353 MachineFunctionPass *P = nullptr;
354 MachineFunctionAnalysisManager *MFAM = nullptr;
355
356public:
362 // Migration only
363 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
364 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
365
366 bool run(MachineFunction &Func, const TargetMachine &TM,
367 const RequiredAnalyses &Analyses);
368
369protected:
371};
372
373} // namespace impl_detail
374} // namespace llvm
375
379
380namespace {
381/// MachineScheduler runs after coalescing and before register allocation.
382class MachineSchedulerLegacy : public MachineFunctionPass {
383 MachineSchedulerImpl Impl;
384
385public:
386 MachineSchedulerLegacy();
387 void getAnalysisUsage(AnalysisUsage &AU) const override;
388 bool runOnMachineFunction(MachineFunction&) override;
389
390 static char ID; // Class identification, replacement for typeinfo
391};
392
393/// PostMachineScheduler runs after shortly before code emission.
394class PostMachineSchedulerLegacy : public MachineFunctionPass {
395 PostMachineSchedulerImpl Impl;
396
397public:
398 PostMachineSchedulerLegacy();
399 void getAnalysisUsage(AnalysisUsage &AU) const override;
400 bool runOnMachineFunction(MachineFunction &) override;
401
402 static char ID; // Class identification, replacement for typeinfo
403};
404
405} // end anonymous namespace
406
407char MachineSchedulerLegacy::ID = 0;
408
409char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
410
411INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
412 "Machine Instruction Scheduler", false, false)
418INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
419 "Machine Instruction Scheduler", false, false)
420
421MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {
423}
424
425void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
426 AU.setPreservesCFG();
436}
437
438char PostMachineSchedulerLegacy::ID = 0;
439
440char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
441
442INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
443 "PostRA Machine Instruction Scheduler", false, false)
447INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
448 "PostRA Machine Instruction Scheduler", false, false)
449
450PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
453}
454
455void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
456 AU.setPreservesCFG();
462}
463
466
467/// A dummy default scheduler factory indicates whether the scheduler
468/// is overridden on the command line.
472
473/// MachineSchedOpt allows command line selection of the scheduler.
478 cl::desc("Machine instruction scheduler to use"));
479
481DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
483
485 "enable-misched",
486 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
487 cl::Hidden);
488
490 "enable-post-misched",
491 cl::desc("Enable the post-ra machine instruction scheduling pass."),
492 cl::init(true), cl::Hidden);
493
494/// Decrement this iterator until reaching the top or a non-debug instr.
498 assert(I != Beg && "reached the top of the region, cannot decrement");
499 while (--I != Beg) {
500 if (!I->isDebugOrPseudoInstr())
501 break;
502 }
503 return I;
504}
505
506/// Non-const version.
513
514/// If this iterator is a debug value, increment until reaching the End or a
515/// non-debug instruction.
519 for(; I != End; ++I) {
520 if (!I->isDebugOrPseudoInstr())
521 break;
522 }
523 return I;
524}
525
526/// Non-const version.
533
534/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
536 // Select the scheduler, or set the default.
538 if (Ctor != useDefaultMachineSched)
539 return Ctor(this);
540
541 // Get the default scheduler set by the target for this function.
542 ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
543 if (Scheduler)
544 return Scheduler;
545
546 // Default to GenericScheduler.
547 return createSchedLive(this);
548}
549
551 const RequiredAnalyses &Analyses) {
552 MF = &Func;
553 MLI = &Analyses.MLI;
554 MDT = &Analyses.MDT;
555 this->TM = &TM;
556 AA = &Analyses.AA;
557 LIS = &Analyses.LIS;
558
559 if (VerifyScheduling) {
560 LLVM_DEBUG(LIS->dump());
561 const char *MSchedBanner = "Before machine scheduling.";
562 if (P)
563 MF->verify(P, MSchedBanner, &errs());
564 else
565 MF->verify(*MFAM, MSchedBanner, &errs());
566 }
567 RegClassInfo->runOnMachineFunction(*MF);
568
569 // Instantiate the selected scheduler for this target, function, and
570 // optimization level.
571 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
572 scheduleRegions(*Scheduler, false);
573
574 LLVM_DEBUG(LIS->dump());
575 if (VerifyScheduling) {
576 const char *MSchedBanner = "After machine scheduling.";
577 if (P)
578 MF->verify(P, MSchedBanner, &errs());
579 else
580 MF->verify(*MFAM, MSchedBanner, &errs());
581 }
582 return true;
583}
584
585/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
586/// the caller. We don't have a command line option to override the postRA
587/// scheduler. The Target must configure it.
589 // Get the postRA scheduler set by the target for this function.
590 ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
591 if (Scheduler)
592 return Scheduler;
593
594 // Default to GenericScheduler.
595 return createSchedPostRA(this);
596}
597
599 const TargetMachine &TM,
600 const RequiredAnalyses &Analyses) {
601 MF = &Func;
602 MLI = &Analyses.MLI;
603 this->TM = &TM;
604 AA = &Analyses.AA;
605
606 if (VerifyScheduling) {
607 const char *PostMSchedBanner = "Before post machine scheduling.";
608 if (P)
609 MF->verify(P, PostMSchedBanner, &errs());
610 else
611 MF->verify(*MFAM, PostMSchedBanner, &errs());
612 }
613
614 // Instantiate the selected scheduler for this target, function, and
615 // optimization level.
616 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
618
619 if (VerifyScheduling) {
620 const char *PostMSchedBanner = "After post machine scheduling.";
621 if (P)
622 MF->verify(P, PostMSchedBanner, &errs());
623 else
624 MF->verify(*MFAM, PostMSchedBanner, &errs());
625 }
626 return true;
627}
628
629/// Top-level MachineScheduler pass driver.
630///
631/// Visit blocks in function order. Divide each block into scheduling regions
632/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
633/// consistent with the DAG builder, which traverses the interior of the
634/// scheduling regions bottom-up.
635///
636/// This design avoids exposing scheduling boundaries to the DAG builder,
637/// simplifying the DAG builder's support for "special" target instructions.
638/// At the same time the design allows target schedulers to operate across
639/// scheduling boundaries, for example to bundle the boundary instructions
640/// without reordering them. This creates complexity, because the target
641/// scheduler must update the RegionBegin and RegionEnd positions cached by
642/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
643/// design would be to split blocks at scheduling boundaries, but LLVM has a
644/// general bias against block splitting purely for implementation simplicity.
645bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
646 if (skipFunction(MF.getFunction()))
647 return false;
648
649 if (EnableMachineSched.getNumOccurrences()) {
651 return false;
652 } else if (!MF.getSubtarget().enableMachineScheduler()) {
653 return false;
654 }
655
656 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
657
658 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
659 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
660 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
661 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
662 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
663 Impl.setLegacyPass(this);
664 return Impl.run(MF, TM, {MLI, MDT, AA, LIS});
665}
666
668 : Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
671 default;
672
674 : Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
676 PostMachineSchedulerPass &&Other) = default;
678
682 if (EnableMachineSched.getNumOccurrences()) {
684 return PreservedAnalyses::all();
685 } else if (!MF.getSubtarget().enableMachineScheduler()) {
686 return PreservedAnalyses::all();
687 }
688
689 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
690 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
691 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
693 .getManager();
694 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
695 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
696 Impl->setMFAM(&MFAM);
697 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS});
698 if (!Changed)
699 return PreservedAnalyses::all();
700
703 .preserve<SlotIndexesAnalysis>()
704 .preserve<LiveIntervalsAnalysis>();
705}
706
707bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
708 if (skipFunction(MF.getFunction()))
709 return false;
710
711 if (EnablePostRAMachineSched.getNumOccurrences()) {
713 return false;
714 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
715 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
716 return false;
717 }
718 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
719 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
720 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
721 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
722 Impl.setLegacyPass(this);
723 return Impl.run(MF, TM, {MLI, AA});
724}
725
729 if (EnablePostRAMachineSched.getNumOccurrences()) {
731 return PreservedAnalyses::all();
732 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
733 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
734 return PreservedAnalyses::all();
735 }
736 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
737 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
739 .getManager();
740 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
741
742 Impl->setMFAM(&MFAM);
743 bool Changed = Impl->run(MF, *TM, {MLI, AA});
744 if (!Changed)
745 return PreservedAnalyses::all();
746
749 return PA;
750}
751
752/// Return true of the given instruction should not be included in a scheduling
753/// region.
754///
755/// MachineScheduler does not currently support scheduling across calls. To
756/// handle calls, the DAG builder needs to be modified to create register
757/// anti/output dependencies on the registers clobbered by the call's regmask
758/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
759/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
760/// the boundary, but there would be no benefit to postRA scheduling across
761/// calls this late anyway.
764 MachineFunction *MF,
765 const TargetInstrInfo *TII) {
766 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
767 MI->isFakeUse();
768}
769
771
772static void
774 MBBRegionsVector &Regions,
775 bool RegionsTopDown) {
776 MachineFunction *MF = MBB->getParent();
778
780 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
781 RegionEnd != MBB->begin(); RegionEnd = I) {
782
783 // Avoid decrementing RegionEnd for blocks with no terminator.
784 if (RegionEnd != MBB->end() ||
785 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
786 --RegionEnd;
787 }
788
789 // The next region starts above the previous region. Look backward in the
790 // instruction stream until we find the nearest boundary.
791 unsigned NumRegionInstrs = 0;
792 I = RegionEnd;
793 for (;I != MBB->begin(); --I) {
794 MachineInstr &MI = *std::prev(I);
795 if (isSchedBoundary(&MI, &*MBB, MF, TII))
796 break;
797 if (!MI.isDebugOrPseudoInstr()) {
798 // MBB::size() uses instr_iterator to count. Here we need a bundle to
799 // count as a single instruction.
800 ++NumRegionInstrs;
801 }
802 }
803
804 // It's possible we found a scheduling region that only has debug
805 // instructions. Don't bother scheduling these.
806 if (NumRegionInstrs != 0)
807 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
808 }
809
810 if (RegionsTopDown)
811 std::reverse(Regions.begin(), Regions.end());
812}
813
814/// Main driver for both MachineScheduler and PostMachineScheduler.
816 bool FixKillFlags) {
817 // Visit all machine basic blocks.
818 //
819 // TODO: Visit blocks in global postorder or postorder within the bottom-up
820 // loop tree. Then we can optionally compute global RegPressure.
821 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
822 MBB != MBBEnd; ++MBB) {
823
824 Scheduler.startBlock(&*MBB);
825
826#ifndef NDEBUG
827 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
828 continue;
829 if (SchedOnlyBlock.getNumOccurrences()
830 && (int)SchedOnlyBlock != MBB->getNumber())
831 continue;
832#endif
833
834 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
835 // points to the scheduling boundary at the bottom of the region. The DAG
836 // does not include RegionEnd, but the region does (i.e. the next
837 // RegionEnd is above the previous RegionBegin). If the current block has
838 // no terminator then RegionEnd == MBB->end() for the bottom region.
839 //
840 // All the regions of MBB are first found and stored in MBBRegions, which
841 // will be processed (MBB) top-down if initialized with true.
842 //
843 // The Scheduler may insert instructions during either schedule() or
844 // exitRegion(), even for empty regions. So the local iterators 'I' and
845 // 'RegionEnd' are invalid across these calls. Instructions must not be
846 // added to other regions than the current one without updating MBBRegions.
847
848 MBBRegionsVector MBBRegions;
849 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
850 bool ScheduleSingleMI = Scheduler.shouldScheduleSingleMIRegions();
851 for (const SchedRegion &R : MBBRegions) {
852 MachineBasicBlock::iterator I = R.RegionBegin;
853 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
854 unsigned NumRegionInstrs = R.NumRegionInstrs;
855
856 // Notify the scheduler of the region, even if we may skip scheduling
857 // it. Perhaps it still needs to be bundled.
858 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
859
860 // Skip empty scheduling regions and, conditionally, regions with a single
861 // MI.
862 if (I == RegionEnd || (!ScheduleSingleMI && I == std::prev(RegionEnd))) {
863 // Close the current region. Bundle the terminator if needed.
864 // This invalidates 'RegionEnd' and 'I'.
865 Scheduler.exitRegion();
866 continue;
867 }
868 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
869 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
870 << " " << MBB->getName() << "\n From: " << *I
871 << " To: ";
872 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
873 else dbgs() << "End\n";
874 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
876 errs() << MF->getName();
877 errs() << ":%bb. " << MBB->getNumber();
878 errs() << " " << MBB->getName() << " \n";
879 }
880
881 // Schedule a region: possibly reorder instructions.
882 // This invalidates the original region iterators.
883 Scheduler.schedule();
884
885 // Close the current region.
886 Scheduler.exitRegion();
887 }
888 Scheduler.finishBlock();
889 // FIXME: Ideally, no further passes should rely on kill flags. However,
890 // thumb2 size reduction is currently an exception, so the PostMIScheduler
891 // needs to do this.
892 if (FixKillFlags)
893 Scheduler.fixupKills(*MBB);
894 }
895 Scheduler.finalizeSchedule();
896}
897
898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
900 dbgs() << "Queue " << Name << ": ";
901 for (const SUnit *SU : Queue)
902 dbgs() << SU->NodeNum << " ";
903 dbgs() << "\n";
904}
905#endif
906
907//===----------------------------------------------------------------------===//
908// ScheduleDAGMI - Basic machine instruction scheduling. This is
909// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
910// virtual registers.
911// ===----------------------------------------------------------------------===/
912
913// Provide a vtable anchor.
915
916/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
917/// NumPredsLeft reaches zero, release the successor node.
918///
919/// FIXME: Adjust SuccSU height based on MinLatency.
921 SUnit *SuccSU = SuccEdge->getSUnit();
922
923 if (SuccEdge->isWeak()) {
924 --SuccSU->WeakPredsLeft;
925 return;
926 }
927#ifndef NDEBUG
928 if (SuccSU->NumPredsLeft == 0) {
929 dbgs() << "*** Scheduling failed! ***\n";
930 dumpNode(*SuccSU);
931 dbgs() << " has been released too many times!\n";
932 llvm_unreachable(nullptr);
933 }
934#endif
935 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
936 // CurrCycle may have advanced since then.
937 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
938 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
939
940 --SuccSU->NumPredsLeft;
941 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
942 SchedImpl->releaseTopNode(SuccSU);
943}
944
945/// releaseSuccessors - Call releaseSucc on each of SU's successors.
947 for (SDep &Succ : SU->Succs)
948 releaseSucc(SU, &Succ);
949}
950
951/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
952/// NumSuccsLeft reaches zero, release the predecessor node.
953///
954/// FIXME: Adjust PredSU height based on MinLatency.
956 SUnit *PredSU = PredEdge->getSUnit();
957
958 if (PredEdge->isWeak()) {
959 --PredSU->WeakSuccsLeft;
960 return;
961 }
962#ifndef NDEBUG
963 if (PredSU->NumSuccsLeft == 0) {
964 dbgs() << "*** Scheduling failed! ***\n";
965 dumpNode(*PredSU);
966 dbgs() << " has been released too many times!\n";
967 llvm_unreachable(nullptr);
968 }
969#endif
970 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
971 // CurrCycle may have advanced since then.
972 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
973 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
974
975 --PredSU->NumSuccsLeft;
976 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
977 SchedImpl->releaseBottomNode(PredSU);
978}
979
980/// releasePredecessors - Call releasePred on each of SU's predecessors.
982 for (SDep &Pred : SU->Preds)
983 releasePred(SU, &Pred);
984}
985
990
995
996/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
997/// after crossing a scheduling boundary. [begin, end) includes all instructions
998/// in the region, including the boundary itself and single-instruction regions
999/// that don't get scheduled.
1003 unsigned regioninstrs)
1004{
1005 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
1006
1007 SchedImpl->initPolicy(begin, end, regioninstrs);
1008
1009 // Set dump direction after initializing sched policy.
1011 if (SchedImpl->getPolicy().OnlyTopDown)
1013 else if (SchedImpl->getPolicy().OnlyBottomUp)
1015 else
1018}
1019
1020/// This is normally called from the main scheduler loop but may also be invoked
1021/// by the scheduling strategy to perform additional code motion.
1024 // Advance RegionBegin if the first instruction moves down.
1025 if (&*RegionBegin == MI)
1026 ++RegionBegin;
1027
1028 // Update the instruction stream.
1029 BB->splice(InsertPos, BB, MI);
1030
1031 // Update LiveIntervals
1032 if (LIS)
1033 LIS->handleMove(*MI, /*UpdateFlags=*/true);
1034
1035 // Recede RegionBegin if an instruction moves above the first.
1036 if (RegionBegin == InsertPos)
1037 RegionBegin = MI;
1038}
1039
1041#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
1042 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
1044 return false;
1045 }
1046 ++NumInstrsScheduled;
1047#endif
1048 return true;
1049}
1050
1051/// Per-region scheduling driver, called back from
1052/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
1053/// that does not consider liveness or register pressure. It is useful for
1054/// PostRA scheduling and potentially other custom schedulers.
1056 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
1057 LLVM_DEBUG(SchedImpl->dumpPolicy());
1058
1059 // Build the DAG.
1061
1063
1064 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1065 findRootsAndBiasEdges(TopRoots, BotRoots);
1066
1067 LLVM_DEBUG(dump());
1068 if (PrintDAGs) dump();
1070
1071 // Initialize the strategy before modifying the DAG.
1072 // This may initialize a DFSResult to be used for queue priority.
1073 SchedImpl->initialize(this);
1074
1075 // Initialize ready queues now that the DAG and priority data are finalized.
1076 initQueues(TopRoots, BotRoots);
1077
1078 bool IsTopNode = false;
1079 while (true) {
1080 if (!checkSchedLimit())
1081 break;
1082
1083 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1084 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1085 if (!SU) break;
1086
1087 assert(!SU->isScheduled && "Node already scheduled");
1088
1089 MachineInstr *MI = SU->getInstr();
1090 if (IsTopNode) {
1091 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1092 if (&*CurrentTop == MI)
1094 else
1096 } else {
1097 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1100 if (&*priorII == MI)
1101 CurrentBottom = priorII;
1102 else {
1103 if (&*CurrentTop == MI)
1104 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1106 CurrentBottom = MI;
1107 }
1108 }
1109 // Notify the scheduling strategy before updating the DAG.
1110 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1111 // runs, it can then use the accurate ReadyCycle time to determine whether
1112 // newly released nodes can move to the readyQ.
1113 SchedImpl->schedNode(SU, IsTopNode);
1114
1115 updateQueues(SU, IsTopNode);
1116 }
1117 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1118
1120
1121 LLVM_DEBUG({
1122 dbgs() << "*** Final schedule for "
1123 << printMBBReference(*begin()->getParent()) << " ***\n";
1124 dumpSchedule();
1125 dbgs() << '\n';
1126 });
1127}
1128
1129/// Apply each ScheduleDAGMutation step in order.
1131 for (auto &m : Mutations)
1132 m->apply(this);
1133}
1134
1137 SmallVectorImpl<SUnit*> &BotRoots) {
1138 for (SUnit &SU : SUnits) {
1139 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1140
1141 // Order predecessors so DFSResult follows the critical path.
1142 SU.biasCriticalPath();
1143
1144 // A SUnit is ready to top schedule if it has no predecessors.
1145 if (!SU.NumPredsLeft)
1146 TopRoots.push_back(&SU);
1147 // A SUnit is ready to bottom schedule if it has no successors.
1148 if (!SU.NumSuccsLeft)
1149 BotRoots.push_back(&SU);
1150 }
1151 ExitSU.biasCriticalPath();
1152}
1153
1154/// Identify DAG roots and setup scheduler queues.
1156 ArrayRef<SUnit *> BotRoots) {
1157 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1158 //
1159 // Nodes with unreleased weak edges can still be roots.
1160 // Release top roots in forward order.
1161 for (SUnit *SU : TopRoots)
1162 SchedImpl->releaseTopNode(SU);
1163
1164 // Release bottom roots in reverse order so the higher priority nodes appear
1165 // first. This is more natural and slightly more efficient.
1167 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1168 SchedImpl->releaseBottomNode(*I);
1169 }
1170
1173
1174 SchedImpl->registerRoots();
1175
1176 // Advance past initial DebugValues.
1179}
1180
1181/// Update scheduler queues after scheduling an instruction.
1182void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
1183 // Release dependent instructions for scheduling.
1184 if (IsTopNode)
1186 else
1188
1189 SU->isScheduled = true;
1190}
1191
1192/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1194 // If first instruction was a DBG_VALUE then put it back.
1195 if (FirstDbgValue) {
1196 BB->splice(RegionBegin, BB, FirstDbgValue);
1198 }
1199
1200 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1201 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1202 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
1203 MachineInstr *DbgValue = P.first;
1204 MachineBasicBlock::iterator OrigPrevMI = P.second;
1205 if (&*RegionBegin == DbgValue)
1206 ++RegionBegin;
1207 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
1208 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1210 }
1211}
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1215
1217 // Bail off when there is no schedule model to query.
1218 if (!SchedModel.hasInstrSchedModel())
1219 return;
1220
1221 // Nothing to show if there is no or just one instruction.
1222 if (BB->size() < 2)
1223 return;
1224
1225 dbgs() << " * Schedule table (TopDown):\n";
1226 dbgs() << scheduleTableLegend << "\n";
1227 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1228 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1229 for (MachineInstr &MI : *this) {
1230 SUnit *SU = getSUnit(&MI);
1231 if (!SU)
1232 continue;
1233 const MCSchedClassDesc *SC = getSchedClass(SU);
1234 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1235 PE = SchedModel.getWriteProcResEnd(SC);
1236 PI != PE; ++PI) {
1237 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1238 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1239 }
1240 }
1241 // Print the header with the cycles
1242 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1243 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1244 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1245 dbgs() << "|\n";
1246
1247 for (MachineInstr &MI : *this) {
1248 SUnit *SU = getSUnit(&MI);
1249 if (!SU) {
1250 dbgs() << "Missing SUnit\n";
1251 continue;
1252 }
1253 std::string NodeName("SU(");
1254 NodeName += std::to_string(SU->NodeNum) + ")";
1255 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1256 unsigned C = FirstCycle;
1257 for (; C <= LastCycle; ++C) {
1258 if (C == SU->TopReadyCycle)
1259 dbgs() << llvm::left_justify("| i", ColWidth);
1260 else
1261 dbgs() << llvm::left_justify("|", ColWidth);
1262 }
1263 dbgs() << "|\n";
1264 const MCSchedClassDesc *SC = getSchedClass(SU);
1265
1267 make_range(SchedModel.getWriteProcResBegin(SC),
1268 SchedModel.getWriteProcResEnd(SC)));
1269
1272 ResourcesIt,
1273 [](const MCWriteProcResEntry &LHS,
1274 const MCWriteProcResEntry &RHS) -> bool {
1275 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1276 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1277 });
1278 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1279 C = FirstCycle;
1280 const std::string ResName =
1281 SchedModel.getResourceName(PI.ProcResourceIdx);
1282 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1283 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1284 dbgs() << llvm::left_justify("|", ColWidth);
1285 }
1286 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1287 ++I, ++C)
1288 dbgs() << llvm::left_justify("| x", ColWidth);
1289 while (C++ <= LastCycle)
1290 dbgs() << llvm::left_justify("|", ColWidth);
1291 // Place end char
1292 dbgs() << "| \n";
1293 }
1294 }
1295}
1296
1298 // Bail off when there is no schedule model to query.
1299 if (!SchedModel.hasInstrSchedModel())
1300 return;
1301
1302 // Nothing to show if there is no or just one instruction.
1303 if (BB->size() < 2)
1304 return;
1305
1306 dbgs() << " * Schedule table (BottomUp):\n";
1307 dbgs() << scheduleTableLegend << "\n";
1308
1309 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1310 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1311 for (MachineInstr &MI : *this) {
1312 SUnit *SU = getSUnit(&MI);
1313 if (!SU)
1314 continue;
1315 const MCSchedClassDesc *SC = getSchedClass(SU);
1316 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1317 PE = SchedModel.getWriteProcResEnd(SC);
1318 PI != PE; ++PI) {
1319 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1320 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1321 }
1322 }
1323 // Print the header with the cycles
1324 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1325 for (int C = FirstCycle; C >= LastCycle; --C)
1326 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1327 dbgs() << "|\n";
1328
1329 for (MachineInstr &MI : *this) {
1330 SUnit *SU = getSUnit(&MI);
1331 if (!SU) {
1332 dbgs() << "Missing SUnit\n";
1333 continue;
1334 }
1335 std::string NodeName("SU(");
1336 NodeName += std::to_string(SU->NodeNum) + ")";
1337 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1338 int C = FirstCycle;
1339 for (; C >= LastCycle; --C) {
1340 if (C == (int)SU->BotReadyCycle)
1341 dbgs() << llvm::left_justify("| i", ColWidth);
1342 else
1343 dbgs() << llvm::left_justify("|", ColWidth);
1344 }
1345 dbgs() << "|\n";
1346 const MCSchedClassDesc *SC = getSchedClass(SU);
1348 make_range(SchedModel.getWriteProcResBegin(SC),
1349 SchedModel.getWriteProcResEnd(SC)));
1350
1353 ResourcesIt,
1354 [](const MCWriteProcResEntry &LHS,
1355 const MCWriteProcResEntry &RHS) -> bool {
1356 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1357 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1358 });
1359 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1360 C = FirstCycle;
1361 const std::string ResName =
1362 SchedModel.getResourceName(PI.ProcResourceIdx);
1363 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1364 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1365 dbgs() << llvm::left_justify("|", ColWidth);
1366 }
1367 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1368 ++I, --C)
1369 dbgs() << llvm::left_justify("| x", ColWidth);
1370 while (C-- >= LastCycle)
1371 dbgs() << llvm::left_justify("|", ColWidth);
1372 // Place end char
1373 dbgs() << "| \n";
1374 }
1375 }
1376}
1377#endif
1378
1379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1384 else if (DumpDir == DumpDirection::BottomUp)
1387 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1388 } else {
1389 dbgs() << "* Schedule table: DumpDirection not set.\n";
1390 }
1391 }
1392
1393 for (MachineInstr &MI : *this) {
1394 if (SUnit *SU = getSUnit(&MI))
1395 dumpNode(*SU);
1396 else
1397 dbgs() << "Missing SUnit\n";
1398 }
1399}
1400#endif
1401
1402//===----------------------------------------------------------------------===//
1403// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1404// preservation.
1405//===----------------------------------------------------------------------===//
1406
1410
1412 const MachineInstr &MI = *SU.getInstr();
1413 for (const MachineOperand &MO : MI.operands()) {
1414 if (!MO.isReg())
1415 continue;
1416 if (!MO.readsReg())
1417 continue;
1418 if (TrackLaneMasks && !MO.isUse())
1419 continue;
1420
1421 Register Reg = MO.getReg();
1422 if (!Reg.isVirtual())
1423 continue;
1424
1425 // Ignore re-defs.
1426 if (TrackLaneMasks) {
1427 bool FoundDef = false;
1428 for (const MachineOperand &MO2 : MI.all_defs()) {
1429 if (MO2.getReg() == Reg && !MO2.isDead()) {
1430 FoundDef = true;
1431 break;
1432 }
1433 }
1434 if (FoundDef)
1435 continue;
1436 }
1437
1438 // Record this local VReg use.
1440 for (; UI != VRegUses.end(); ++UI) {
1441 if (UI->SU == &SU)
1442 break;
1443 }
1444 if (UI == VRegUses.end())
1445 VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1446 }
1447}
1448
1449/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1450/// crossing a scheduling boundary. [begin, end) includes all instructions in
1451/// the region, including the boundary itself and single-instruction regions
1452/// that don't get scheduled.
1456 unsigned regioninstrs)
1457{
1458 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1459 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1460
1461 // For convenience remember the end of the liveness region.
1462 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1463
1464 SUPressureDiffs.clear();
1465
1466 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1467 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1468
1470 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1471}
1472
1473// Setup the register pressure trackers for the top scheduled and bottom
1474// scheduled regions.
1476 VRegUses.clear();
1477 VRegUses.setUniverse(MRI.getNumVirtRegs());
1478 for (SUnit &SU : SUnits)
1479 collectVRegUses(SU);
1480
1482 ShouldTrackLaneMasks, false);
1484 ShouldTrackLaneMasks, false);
1485
1486 // Close the RPTracker to finalize live ins.
1487 RPTracker.closeRegion();
1488
1489 LLVM_DEBUG(RPTracker.dump());
1490
1491 // Initialize the live ins and live outs.
1492 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1493 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1494
1495 // Close one end of the tracker so we can call
1496 // getMaxUpward/DownwardPressureDelta before advancing across any
1497 // instructions. This converts currently live regs into live ins/outs.
1498 TopRPTracker.closeTop();
1499 BotRPTracker.closeBottom();
1500
1501 BotRPTracker.initLiveThru(RPTracker);
1502 if (!BotRPTracker.getLiveThru().empty()) {
1503 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1504 LLVM_DEBUG(dbgs() << "Live Thru: ";
1505 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1506 };
1507
1508 // For each live out vreg reduce the pressure change associated with other
1509 // uses of the same vreg below the live-out reaching def.
1510 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1511
1512 // Account for liveness generated by the region boundary.
1513 if (LiveRegionEnd != RegionEnd) {
1515 BotRPTracker.recede(&LiveUses);
1516 updatePressureDiffs(LiveUses);
1517 }
1518
1519 LLVM_DEBUG(dbgs() << "Top Pressure: ";
1520 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1521 dbgs() << "Bottom Pressure: ";
1522 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1523
1524 assert((BotRPTracker.getPos() == RegionEnd ||
1525 (RegionEnd->isDebugInstr() &&
1527 "Can't find the region bottom");
1528
1529 // Cache the list of excess pressure sets in this region. This will also track
1530 // the max pressure in the scheduled code for these sets.
1531 RegionCriticalPSets.clear();
1532 const std::vector<unsigned> &RegionPressure =
1533 RPTracker.getPressure().MaxSetPressure;
1534 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1535 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1536 if (RegionPressure[i] > Limit) {
1537 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1538 << " Actual " << RegionPressure[i] << "\n");
1539 RegionCriticalPSets.push_back(PressureChange(i));
1540 }
1541 }
1542 LLVM_DEBUG({
1543 if (RegionCriticalPSets.size() > 0) {
1544 dbgs() << "Excess PSets: ";
1545 for (const PressureChange &RCPS : RegionCriticalPSets)
1546 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1547 dbgs() << "\n";
1548 }
1549 });
1550}
1551
1554 const std::vector<unsigned> &NewMaxPressure) {
1555 const PressureDiff &PDiff = getPressureDiff(SU);
1556 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1557 for (const PressureChange &PC : PDiff) {
1558 if (!PC.isValid())
1559 break;
1560 unsigned ID = PC.getPSet();
1561 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1562 ++CritIdx;
1563 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1564 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1565 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1566 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1567 }
1568 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1569 if (NewMaxPressure[ID] >= Limit - 2) {
1570 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1571 << NewMaxPressure[ID]
1572 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1573 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1574 << " livethru)\n");
1575 }
1576 }
1577}
1578
1579/// Update the PressureDiff array for liveness after scheduling this
1580/// instruction.
1582 for (const VRegMaskOrUnit &P : LiveUses) {
1583 Register Reg = P.RegUnit;
1584 /// FIXME: Currently assuming single-use physregs.
1585 if (!Reg.isVirtual())
1586 continue;
1587
1589 // If the register has just become live then other uses won't change
1590 // this fact anymore => decrement pressure.
1591 // If the register has just become dead then other uses make it come
1592 // back to life => increment pressure.
1593 bool Decrement = P.LaneMask.any();
1594
1595 for (const VReg2SUnit &V2SU
1596 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1597 SUnit &SU = *V2SU.SU;
1598 if (SU.isScheduled || &SU == &ExitSU)
1599 continue;
1600
1601 PressureDiff &PDiff = getPressureDiff(&SU);
1602 PDiff.addPressureChange(Reg, Decrement, &MRI);
1603 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1604 return Change.isValid();
1605 }))
1607 << " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1608 << printReg(Reg, TRI) << ':'
1609 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1610 dbgs() << " to "; PDiff.dump(*TRI););
1611 }
1612 } else {
1613 assert(P.LaneMask.any());
1614 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1615 // This may be called before CurrentBottom has been initialized. However,
1616 // BotRPTracker must have a valid position. We want the value live into the
1617 // instruction or live out of the block, so ask for the previous
1618 // instruction's live-out.
1619 const LiveInterval &LI = LIS->getInterval(Reg);
1620 VNInfo *VNI;
1622 nextIfDebug(BotRPTracker.getPos(), BB->end());
1623 if (I == BB->end())
1624 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1625 else {
1626 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1627 VNI = LRQ.valueIn();
1628 }
1629 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1630 assert(VNI && "No live value at use.");
1631 for (const VReg2SUnit &V2SU
1632 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1633 SUnit *SU = V2SU.SU;
1634 // If this use comes before the reaching def, it cannot be a last use,
1635 // so decrease its pressure change.
1636 if (!SU->isScheduled && SU != &ExitSU) {
1637 LiveQueryResult LRQ =
1638 LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1639 if (LRQ.valueIn() == VNI) {
1640 PressureDiff &PDiff = getPressureDiff(SU);
1641 PDiff.addPressureChange(Reg, true, &MRI);
1642 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1643 return Change.isValid();
1644 }))
1645 LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1646 << ") " << *SU->getInstr();
1647 dbgs() << " to ";
1648 PDiff.dump(*TRI););
1649 }
1650 }
1651 }
1652 }
1653 }
1654}
1655
1657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1658 if (EntrySU.getInstr() != nullptr)
1660 for (const SUnit &SU : SUnits) {
1661 dumpNodeAll(SU);
1662 if (ShouldTrackPressure) {
1663 dbgs() << " Pressure Diff : ";
1664 getPressureDiff(&SU).dump(*TRI);
1665 }
1666 dbgs() << " Single Issue : ";
1667 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1668 SchedModel.mustEndGroup(SU.getInstr()))
1669 dbgs() << "true;";
1670 else
1671 dbgs() << "false;";
1672 dbgs() << '\n';
1673 }
1674 if (ExitSU.getInstr() != nullptr)
1676#endif
1677}
1678
1679/// schedule - Called back from MachineScheduler::runOnMachineFunction
1680/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1681/// only includes instructions that have DAG nodes, not scheduling boundaries.
1682///
1683/// This is a skeletal driver, with all the functionality pushed into helpers,
1684/// so that it can be easily extended by experimental schedulers. Generally,
1685/// implementing MachineSchedStrategy should be sufficient to implement a new
1686/// scheduling algorithm. However, if a scheduler further subclasses
1687/// ScheduleDAGMILive then it will want to override this virtual method in order
1688/// to update any specialized state.
1690 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1691 LLVM_DEBUG(SchedImpl->dumpPolicy());
1693
1695
1696 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1697 findRootsAndBiasEdges(TopRoots, BotRoots);
1698
1699 // Initialize the strategy before modifying the DAG.
1700 // This may initialize a DFSResult to be used for queue priority.
1701 SchedImpl->initialize(this);
1702
1703 LLVM_DEBUG(dump());
1704 if (PrintDAGs) dump();
1706
1707 // Initialize ready queues now that the DAG and priority data are finalized.
1708 initQueues(TopRoots, BotRoots);
1709
1710 bool IsTopNode = false;
1711 while (true) {
1712 if (!checkSchedLimit())
1713 break;
1714
1715 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1716 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1717 if (!SU) break;
1718
1719 assert(!SU->isScheduled && "Node already scheduled");
1720
1721 scheduleMI(SU, IsTopNode);
1722
1723 if (DFSResult) {
1724 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1725 if (!ScheduledTrees.test(SubtreeID)) {
1726 ScheduledTrees.set(SubtreeID);
1727 DFSResult->scheduleTree(SubtreeID);
1728 SchedImpl->scheduleTree(SubtreeID);
1729 }
1730 }
1731
1732 // Notify the scheduling strategy after updating the DAG.
1733 SchedImpl->schedNode(SU, IsTopNode);
1734
1735 updateQueues(SU, IsTopNode);
1736 }
1737 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1738
1740
1741 LLVM_DEBUG({
1742 dbgs() << "*** Final schedule for "
1743 << printMBBReference(*begin()->getParent()) << " ***\n";
1744 dumpSchedule();
1745 dbgs() << '\n';
1746 });
1747}
1748
1749/// Build the DAG and setup three register pressure trackers.
1751 if (!ShouldTrackPressure) {
1752 RPTracker.reset();
1753 RegionCriticalPSets.clear();
1755 return;
1756 }
1757
1758 // Initialize the register pressure tracker used by buildSchedGraph.
1760 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1761
1762 // Account for liveness generate by the region boundary.
1763 if (LiveRegionEnd != RegionEnd)
1764 RPTracker.recede();
1765
1766 // Build the DAG, and compute current register pressure.
1768
1769 // Initialize top/bottom trackers after computing region pressure.
1771}
1772
1774 if (!DFSResult)
1775 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1776 DFSResult->clear();
1777 ScheduledTrees.clear();
1778 DFSResult->resize(SUnits.size());
1779 DFSResult->compute(SUnits);
1780 ScheduledTrees.resize(DFSResult->getNumSubtrees());
1781}
1782
1783/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1784/// only provides the critical path for single block loops. To handle loops that
1785/// span blocks, we could use the vreg path latencies provided by
1786/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1787/// available for use in the scheduler.
1788///
1789/// The cyclic path estimation identifies a def-use pair that crosses the back
1790/// edge and considers the depth and height of the nodes. For example, consider
1791/// the following instruction sequence where each instruction has unit latency
1792/// and defines an eponymous virtual register:
1793///
1794/// a->b(a,c)->c(b)->d(c)->exit
1795///
1796/// The cyclic critical path is a two cycles: b->c->b
1797/// The acyclic critical path is four cycles: a->b->c->d->exit
1798/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1799/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1800/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1801/// LiveInDepth = depth(b) = len(a->b) = 1
1802///
1803/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1804/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1805/// CyclicCriticalPath = min(2, 2) = 2
1806///
1807/// This could be relevant to PostRA scheduling, but is currently implemented
1808/// assuming LiveIntervals.
1810 // This only applies to single block loop.
1811 if (!BB->isSuccessor(BB))
1812 return 0;
1813
1814 unsigned MaxCyclicLatency = 0;
1815 // Visit each live out vreg def to find def/use pairs that cross iterations.
1816 for (const VRegMaskOrUnit &P : RPTracker.getPressure().LiveOutRegs) {
1817 Register Reg = P.RegUnit;
1818 if (!Reg.isVirtual())
1819 continue;
1820 const LiveInterval &LI = LIS->getInterval(Reg);
1821 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1822 if (!DefVNI)
1823 continue;
1824
1825 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1826 const SUnit *DefSU = getSUnit(DefMI);
1827 if (!DefSU)
1828 continue;
1829
1830 unsigned LiveOutHeight = DefSU->getHeight();
1831 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1832 // Visit all local users of the vreg def.
1833 for (const VReg2SUnit &V2SU
1834 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1835 SUnit *SU = V2SU.SU;
1836 if (SU == &ExitSU)
1837 continue;
1838
1839 // Only consider uses of the phi.
1840 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1841 if (!LRQ.valueIn()->isPHIDef())
1842 continue;
1843
1844 // Assume that a path spanning two iterations is a cycle, which could
1845 // overestimate in strange cases. This allows cyclic latency to be
1846 // estimated as the minimum slack of the vreg's depth or height.
1847 unsigned CyclicLatency = 0;
1848 if (LiveOutDepth > SU->getDepth())
1849 CyclicLatency = LiveOutDepth - SU->getDepth();
1850
1851 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1852 if (LiveInHeight > LiveOutHeight) {
1853 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1854 CyclicLatency = LiveInHeight - LiveOutHeight;
1855 } else
1856 CyclicLatency = 0;
1857
1858 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1859 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1860 if (CyclicLatency > MaxCyclicLatency)
1861 MaxCyclicLatency = CyclicLatency;
1862 }
1863 }
1864 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1865 return MaxCyclicLatency;
1866}
1867
1868/// Release ExitSU predecessors and setup scheduler queues. Re-position
1869/// the Top RP tracker in case the region beginning has changed.
1871 ArrayRef<SUnit*> BotRoots) {
1872 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1873 if (ShouldTrackPressure) {
1874 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1875 TopRPTracker.setPos(CurrentTop);
1876 }
1877}
1878
1879/// Move an instruction and update register pressure.
1880void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1881 // Move the instruction to its new location in the instruction stream.
1882 MachineInstr *MI = SU->getInstr();
1883
1884 if (IsTopNode) {
1885 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1886 if (&*CurrentTop == MI)
1888 else {
1890 TopRPTracker.setPos(MI);
1891 }
1892
1893 if (ShouldTrackPressure) {
1894 // Update top scheduled pressure.
1895 RegisterOperands RegOpers;
1896 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1897 /*IgnoreDead=*/false);
1899 // Adjust liveness and add missing dead+read-undef flags.
1900 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1901 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1902 } else {
1903 // Adjust for missing dead-def flags.
1904 RegOpers.detectDeadDefs(*MI, *LIS);
1905 }
1906
1907 TopRPTracker.advance(RegOpers);
1908 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1909 LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1910 TopRPTracker.getRegSetPressureAtPos(), TRI););
1911
1912 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1913 }
1914 } else {
1915 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1918 if (&*priorII == MI)
1919 CurrentBottom = priorII;
1920 else {
1921 if (&*CurrentTop == MI) {
1922 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1923 TopRPTracker.setPos(CurrentTop);
1924 }
1926 CurrentBottom = MI;
1928 }
1929 if (ShouldTrackPressure) {
1930 RegisterOperands RegOpers;
1931 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1932 /*IgnoreDead=*/false);
1934 // Adjust liveness and add missing dead+read-undef flags.
1935 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1936 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1937 } else {
1938 // Adjust for missing dead-def flags.
1939 RegOpers.detectDeadDefs(*MI, *LIS);
1940 }
1941
1942 if (BotRPTracker.getPos() != CurrentBottom)
1943 BotRPTracker.recedeSkipDebugValues();
1945 BotRPTracker.recede(RegOpers, &LiveUses);
1946 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1947 LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1948 BotRPTracker.getRegSetPressureAtPos(), TRI););
1949
1950 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1951 updatePressureDiffs(LiveUses);
1952 }
1953 }
1954}
1955
1956//===----------------------------------------------------------------------===//
1957// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1958//===----------------------------------------------------------------------===//
1959
1960namespace {
1961
1962/// Post-process the DAG to create cluster edges between neighboring
1963/// loads or between neighboring stores.
1964class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1965 struct MemOpInfo {
1966 SUnit *SU;
1968 int64_t Offset;
1969 LocationSize Width;
1970 bool OffsetIsScalable;
1971
1972 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1973 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1974 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1975 OffsetIsScalable(OffsetIsScalable) {}
1976
1977 static bool Compare(const MachineOperand *const &A,
1978 const MachineOperand *const &B) {
1979 if (A->getType() != B->getType())
1980 return A->getType() < B->getType();
1981 if (A->isReg())
1982 return A->getReg() < B->getReg();
1983 if (A->isFI()) {
1984 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1986 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1988 return StackGrowsDown ? A->getIndex() > B->getIndex()
1989 : A->getIndex() < B->getIndex();
1990 }
1991
1992 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1993 "index bases.");
1994 }
1995
1996 bool operator<(const MemOpInfo &RHS) const {
1997 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1998 // comparison instead when it's available.
1999 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
2000 RHS.BaseOps.begin(), RHS.BaseOps.end(),
2001 Compare))
2002 return true;
2003 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
2004 BaseOps.begin(), BaseOps.end(), Compare))
2005 return false;
2006 if (Offset != RHS.Offset)
2007 return Offset < RHS.Offset;
2008 return SU->NodeNum < RHS.SU->NodeNum;
2009 }
2010 };
2011
2012 const TargetInstrInfo *TII;
2013 const TargetRegisterInfo *TRI;
2014 bool IsLoad;
2015 bool ReorderWhileClustering;
2016
2017public:
2018 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
2019 const TargetRegisterInfo *tri, bool IsLoad,
2020 bool ReorderWhileClustering)
2021 : TII(tii), TRI(tri), IsLoad(IsLoad),
2022 ReorderWhileClustering(ReorderWhileClustering) {}
2023
2024 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2025
2026protected:
2027 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
2028 ScheduleDAGInstrs *DAG);
2029 void collectMemOpRecords(std::vector<SUnit> &SUnits,
2030 SmallVectorImpl<MemOpInfo> &MemOpRecords);
2031 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2032 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
2033};
2034
2035class StoreClusterMutation : public BaseMemOpClusterMutation {
2036public:
2037 StoreClusterMutation(const TargetInstrInfo *tii,
2038 const TargetRegisterInfo *tri,
2039 bool ReorderWhileClustering)
2040 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
2041};
2042
2043class LoadClusterMutation : public BaseMemOpClusterMutation {
2044public:
2045 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
2046 bool ReorderWhileClustering)
2047 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
2048};
2049
2050} // end anonymous namespace
2051
2052std::unique_ptr<ScheduleDAGMutation>
2054 const TargetRegisterInfo *TRI,
2055 bool ReorderWhileClustering) {
2056 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
2057 TII, TRI, ReorderWhileClustering)
2058 : nullptr;
2059}
2060
2061std::unique_ptr<ScheduleDAGMutation>
2063 const TargetRegisterInfo *TRI,
2064 bool ReorderWhileClustering) {
2065 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
2066 TII, TRI, ReorderWhileClustering)
2067 : nullptr;
2068}
2069
2070// Sorting all the loads/stores first, then for each load/store, checking the
2071// following load/store one by one, until reach the first non-dependent one and
2072// call target hook to see if they can cluster.
2073// If FastCluster is enabled, we assume that, all the loads/stores have been
2074// preprocessed and now, they didn't have dependencies on each other.
2075void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2076 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2077 ScheduleDAGInstrs *DAG) {
2078 // Keep track of the current cluster length and bytes for each SUnit.
2081
2082 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2083 // cluster mem ops collected within `MemOpRecords` array.
2084 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
2085 // Decision to cluster mem ops is taken based on target dependent logic
2086 auto MemOpa = MemOpRecords[Idx];
2087
2088 // Seek for the next load/store to do the cluster.
2089 unsigned NextIdx = Idx + 1;
2090 for (; NextIdx < End; ++NextIdx)
2091 // Skip if MemOpb has been clustered already or has dependency with
2092 // MemOpa.
2093 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
2094 (FastCluster ||
2095 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2096 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2097 break;
2098 if (NextIdx == End)
2099 continue;
2100
2101 auto MemOpb = MemOpRecords[NextIdx];
2102 unsigned ClusterLength = 2;
2103 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2104 MemOpb.Width.getValue().getKnownMinValue();
2105 auto It = SUnit2ClusterInfo.find(MemOpa.SU->NodeNum);
2106 if (It != SUnit2ClusterInfo.end()) {
2107 const auto &[Len, Bytes] = It->second;
2108 ClusterLength = Len + 1;
2109 CurrentClusterBytes = Bytes + MemOpb.Width.getValue().getKnownMinValue();
2110 }
2111
2112 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2113 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2114 MemOpb.Offset, MemOpb.OffsetIsScalable,
2115 ClusterLength, CurrentClusterBytes))
2116 continue;
2117
2118 SUnit *SUa = MemOpa.SU;
2119 SUnit *SUb = MemOpb.SU;
2120
2121 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2122 std::swap(SUa, SUb);
2123
2124 // FIXME: Is this check really required?
2125 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
2126 continue;
2127
2128 Clusters.unionSets(SUa, SUb);
2129 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2130 << SUb->NodeNum << ")\n");
2131 ++NumClustered;
2132
2133 if (IsLoad) {
2134 // Copy successor edges from SUa to SUb. Interleaving computation
2135 // dependent on SUa can prevent load combining due to register reuse.
2136 // Predecessor edges do not need to be copied from SUb to SUa since
2137 // nearby loads should have effectively the same inputs.
2138 for (const SDep &Succ : SUa->Succs) {
2139 if (Succ.getSUnit() == SUb)
2140 continue;
2141 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2142 << ")\n");
2143 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
2144 }
2145 } else {
2146 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
2147 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
2148 // do not need to be copied from SUa to SUb since no one will depend
2149 // on stores.
2150 // Notice that, we don't need to care about the memory dependency as
2151 // we won't try to cluster them if they have any memory dependency.
2152 for (const SDep &Pred : SUb->Preds) {
2153 if (Pred.getSUnit() == SUa)
2154 continue;
2155 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2156 << ")\n");
2157 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
2158 }
2159 }
2160
2161 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2162 CurrentClusterBytes};
2163
2164 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2165 << ", Curr cluster bytes: " << CurrentClusterBytes
2166 << "\n");
2167 }
2168
2169 // Add cluster group information.
2170 // Iterate over all of the equivalence sets.
2171 auto &AllClusters = DAG->getClusters();
2172 for (const EquivalenceClasses<SUnit *>::ECValue *I : Clusters) {
2173 if (!I->isLeader())
2174 continue;
2175 ClusterInfo Group;
2176 unsigned ClusterIdx = AllClusters.size();
2177 for (SUnit *MemberI : Clusters.members(*I)) {
2178 MemberI->ParentClusterIdx = ClusterIdx;
2179 Group.insert(MemberI);
2180 }
2181 AllClusters.push_back(Group);
2182 }
2183}
2184
2185void BaseMemOpClusterMutation::collectMemOpRecords(
2186 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2187 for (auto &SU : SUnits) {
2188 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
2189 (!IsLoad && !SU.getInstr()->mayStore()))
2190 continue;
2191
2192 const MachineInstr &MI = *SU.getInstr();
2194 int64_t Offset;
2195 bool OffsetIsScalable;
2198 OffsetIsScalable, Width, TRI)) {
2199 if (!Width.hasValue())
2200 continue;
2201
2202 MemOpRecords.push_back(
2203 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
2204
2205 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2206 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
2207 << ", Width: " << Width << "\n");
2208 }
2209#ifndef NDEBUG
2210 for (const auto *Op : BaseOps)
2211 assert(Op);
2212#endif
2213 }
2214}
2215
2216bool BaseMemOpClusterMutation::groupMemOps(
2219 bool FastCluster =
2221 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
2222
2223 for (const auto &MemOp : MemOps) {
2224 unsigned ChainPredID = DAG->SUnits.size();
2225 if (FastCluster) {
2226 for (const SDep &Pred : MemOp.SU->Preds) {
2227 // We only want to cluster the mem ops that have the same ctrl(non-data)
2228 // pred so that they didn't have ctrl dependency for each other. But for
2229 // store instrs, we can still cluster them if the pred is load instr.
2230 if ((Pred.isCtrl() &&
2231 (IsLoad ||
2232 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2233 !Pred.isArtificial()) {
2234 ChainPredID = Pred.getSUnit()->NodeNum;
2235 break;
2236 }
2237 }
2238 } else
2239 ChainPredID = 0;
2240
2241 Groups[ChainPredID].push_back(MemOp);
2242 }
2243 return FastCluster;
2244}
2245
2246/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2247void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2248 // Collect all the clusterable loads/stores
2249 SmallVector<MemOpInfo, 32> MemOpRecords;
2250 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2251
2252 if (MemOpRecords.size() < 2)
2253 return;
2254
2255 // Put the loads/stores without dependency into the same group with some
2256 // heuristic if the DAG is too complex to avoid compiling time blow up.
2257 // Notice that, some fusion pair could be lost with this.
2259 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2260
2261 for (auto &Group : Groups) {
2262 // Sorting the loads/stores, so that, we can stop the cluster as early as
2263 // possible.
2264 llvm::sort(Group.second);
2265
2266 // Trying to cluster all the neighboring loads/stores.
2267 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2268 }
2269}
2270
2271//===----------------------------------------------------------------------===//
2272// CopyConstrain - DAG post-processing to encourage copy elimination.
2273//===----------------------------------------------------------------------===//
2274
2275namespace {
2276
2277/// Post-process the DAG to create weak edges from all uses of a copy to
2278/// the one use that defines the copy's source vreg, most likely an induction
2279/// variable increment.
2280class CopyConstrain : public ScheduleDAGMutation {
2281 // Transient state.
2282 SlotIndex RegionBeginIdx;
2283
2284 // RegionEndIdx is the slot index of the last non-debug instruction in the
2285 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2286 SlotIndex RegionEndIdx;
2287
2288public:
2289 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2290
2291 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2292
2293protected:
2294 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2295};
2296
2297} // end anonymous namespace
2298
2299std::unique_ptr<ScheduleDAGMutation>
2301 const TargetRegisterInfo *TRI) {
2302 return std::make_unique<CopyConstrain>(TII, TRI);
2303}
2304
2305/// constrainLocalCopy handles two possibilities:
2306/// 1) Local src:
2307/// I0: = dst
2308/// I1: src = ...
2309/// I2: = dst
2310/// I3: dst = src (copy)
2311/// (create pred->succ edges I0->I1, I2->I1)
2312///
2313/// 2) Local copy:
2314/// I0: dst = src (copy)
2315/// I1: = dst
2316/// I2: src = ...
2317/// I3: = dst
2318/// (create pred->succ edges I1->I2, I3->I2)
2319///
2320/// Although the MachineScheduler is currently constrained to single blocks,
2321/// this algorithm should handle extended blocks. An EBB is a set of
2322/// contiguously numbered blocks such that the previous block in the EBB is
2323/// always the single predecessor.
2324void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2325 LiveIntervals *LIS = DAG->getLIS();
2326 MachineInstr *Copy = CopySU->getInstr();
2327
2328 // Check for pure vreg copies.
2329 const MachineOperand &SrcOp = Copy->getOperand(1);
2330 Register SrcReg = SrcOp.getReg();
2331 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2332 return;
2333
2334 const MachineOperand &DstOp = Copy->getOperand(0);
2335 Register DstReg = DstOp.getReg();
2336 if (!DstReg.isVirtual() || DstOp.isDead())
2337 return;
2338
2339 // Check if either the dest or source is local. If it's live across a back
2340 // edge, it's not local. Note that if both vregs are live across the back
2341 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2342 // If both the copy's source and dest are local live intervals, then we
2343 // should treat the dest as the global for the purpose of adding
2344 // constraints. This adds edges from source's other uses to the copy.
2345 unsigned LocalReg = SrcReg;
2346 unsigned GlobalReg = DstReg;
2347 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2348 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2349 LocalReg = DstReg;
2350 GlobalReg = SrcReg;
2351 LocalLI = &LIS->getInterval(LocalReg);
2352 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2353 return;
2354 }
2355 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2356
2357 // Find the global segment after the start of the local LI.
2358 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2359 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2360 // local live range. We could create edges from other global uses to the local
2361 // start, but the coalescer should have already eliminated these cases, so
2362 // don't bother dealing with it.
2363 if (GlobalSegment == GlobalLI->end())
2364 return;
2365
2366 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2367 // returned the next global segment. But if GlobalSegment overlaps with
2368 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2369 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2370 if (GlobalSegment->contains(LocalLI->beginIndex()))
2371 ++GlobalSegment;
2372
2373 if (GlobalSegment == GlobalLI->end())
2374 return;
2375
2376 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2377 if (GlobalSegment != GlobalLI->begin()) {
2378 // Two address defs have no hole.
2379 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2380 GlobalSegment->start)) {
2381 return;
2382 }
2383 // If the prior global segment may be defined by the same two-address
2384 // instruction that also defines LocalLI, then can't make a hole here.
2385 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2386 LocalLI->beginIndex())) {
2387 return;
2388 }
2389 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2390 // it would be a disconnected component in the live range.
2391 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2392 "Disconnected LRG within the scheduling region.");
2393 }
2394 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2395 if (!GlobalDef)
2396 return;
2397
2398 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2399 if (!GlobalSU)
2400 return;
2401
2402 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2403 // constraining the uses of the last local def to precede GlobalDef.
2404 SmallVector<SUnit*,8> LocalUses;
2405 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2406 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2407 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2408 for (const SDep &Succ : LastLocalSU->Succs) {
2409 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2410 continue;
2411 if (Succ.getSUnit() == GlobalSU)
2412 continue;
2413 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2414 return;
2415 LocalUses.push_back(Succ.getSUnit());
2416 }
2417 // Open the top of the GlobalLI hole by constraining any earlier global uses
2418 // to precede the start of LocalLI.
2419 SmallVector<SUnit*,8> GlobalUses;
2420 MachineInstr *FirstLocalDef =
2421 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2422 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2423 for (const SDep &Pred : GlobalSU->Preds) {
2424 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2425 continue;
2426 if (Pred.getSUnit() == FirstLocalSU)
2427 continue;
2428 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2429 return;
2430 GlobalUses.push_back(Pred.getSUnit());
2431 }
2432 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2433 // Add the weak edges.
2434 for (SUnit *LU : LocalUses) {
2435 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2436 << GlobalSU->NodeNum << ")\n");
2437 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2438 }
2439 for (SUnit *GU : GlobalUses) {
2440 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2441 << FirstLocalSU->NodeNum << ")\n");
2442 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2443 }
2444}
2445
2446/// Callback from DAG postProcessing to create weak edges to encourage
2447/// copy elimination.
2448void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2449 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2450 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2451
2452 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2453 if (FirstPos == DAG->end())
2454 return;
2455 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2456 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2457 *priorNonDebug(DAG->end(), DAG->begin()));
2458
2459 for (SUnit &SU : DAG->SUnits) {
2460 if (!SU.getInstr()->isCopy())
2461 continue;
2462
2463 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2464 }
2465}
2466
2467//===----------------------------------------------------------------------===//
2468// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2469// and possibly other custom schedulers.
2470//===----------------------------------------------------------------------===//
2471
2472static const unsigned InvalidCycle = ~0U;
2473
2475
2476/// Given a Count of resource usage and a Latency value, return true if a
2477/// SchedBoundary becomes resource limited.
2478/// If we are checking after scheduling a node, we should return true when
2479/// we just reach the resource limit.
2480static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2481 unsigned Latency, bool AfterSchedNode) {
2482 int ResCntFactor = (int)(Count - (Latency * LFactor));
2483 if (AfterSchedNode)
2484 return ResCntFactor >= (int)LFactor;
2485 else
2486 return ResCntFactor > (int)LFactor;
2487}
2488
2490 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2491 // Destroying and reconstructing it is very expensive though. So keep
2492 // invalid, placeholder HazardRecs.
2493 if (HazardRec && HazardRec->isEnabled()) {
2494 delete HazardRec;
2495 HazardRec = nullptr;
2496 }
2497 Available.clear();
2498 Pending.clear();
2499 CheckPending = false;
2500 CurrCycle = 0;
2501 CurrMOps = 0;
2502 MinReadyCycle = std::numeric_limits<unsigned>::max();
2503 ExpectedLatency = 0;
2504 DependentLatency = 0;
2505 RetiredMOps = 0;
2506 MaxExecutedResCount = 0;
2507 ZoneCritResIdx = 0;
2508 IsResourceLimited = false;
2509 ReservedCycles.clear();
2510 ReservedResourceSegments.clear();
2511 ReservedCyclesIndex.clear();
2512 ResourceGroupSubUnitMasks.clear();
2513#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2514 // Track the maximum number of stall cycles that could arise either from the
2515 // latency of a DAG edge or the number of cycles that a processor resource is
2516 // reserved (SchedBoundary::ReservedCycles).
2517 MaxObservedStall = 0;
2518#endif
2519 // Reserve a zero-count for invalid CritResIdx.
2520 ExecutedResCounts.resize(1);
2521 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2522}
2523
2525init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2526 reset();
2527 if (!SchedModel->hasInstrSchedModel())
2528 return;
2529 RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2530 for (SUnit &SU : DAG->SUnits) {
2531 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2532 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2533 * SchedModel->getMicroOpFactor();
2535 PI = SchedModel->getWriteProcResBegin(SC),
2536 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2537 unsigned PIdx = PI->ProcResourceIdx;
2538 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2539 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2540 RemainingCounts[PIdx] +=
2541 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2542 }
2543 }
2544}
2545
2547init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2548 reset();
2549 DAG = dag;
2550 SchedModel = smodel;
2551 Rem = rem;
2552 if (SchedModel->hasInstrSchedModel()) {
2553 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2554 ReservedCyclesIndex.resize(ResourceCount);
2555 ExecutedResCounts.resize(ResourceCount);
2556 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2557 unsigned NumUnits = 0;
2558
2559 for (unsigned i = 0; i < ResourceCount; ++i) {
2560 ReservedCyclesIndex[i] = NumUnits;
2561 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2562 if (isUnbufferedGroup(i)) {
2563 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2564 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2565 U != UE; ++U)
2566 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2567 }
2568 }
2569
2570 ReservedCycles.resize(NumUnits, InvalidCycle);
2571 }
2572}
2573
2574/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2575/// these "soft stalls" differently than the hard stall cycles based on CPU
2576/// resources and computed by checkHazard(). A fully in-order model
2577/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2578/// available for scheduling until they are ready. However, a weaker in-order
2579/// model may use this for heuristics. For example, if a processor has in-order
2580/// behavior when reading certain resources, this may come into play.
2582 if (!SU->isUnbuffered)
2583 return 0;
2584
2585 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2586 if (ReadyCycle > CurrCycle)
2587 return ReadyCycle - CurrCycle;
2588 return 0;
2589}
2590
2591/// Compute the next cycle at which the given processor resource unit
2592/// can be scheduled.
2594 unsigned ReleaseAtCycle,
2595 unsigned AcquireAtCycle) {
2596 if (SchedModel && SchedModel->enableIntervals()) {
2597 if (isTop())
2598 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2599 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2600
2601 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2602 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2603 }
2604
2605 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2606 // If this resource has never been used, always return cycle zero.
2607 if (NextUnreserved == InvalidCycle)
2608 return CurrCycle;
2609 // For bottom-up scheduling add the cycles needed for the current operation.
2610 if (!isTop())
2611 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2612 return NextUnreserved;
2613}
2614
2615/// Compute the next cycle at which the given processor resource can be
2616/// scheduled. Returns the next cycle and the index of the processor resource
2617/// instance in the reserved cycles vector.
2618std::pair<unsigned, unsigned>
2620 unsigned ReleaseAtCycle,
2621 unsigned AcquireAtCycle) {
2623 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2625 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2626 }
2627 unsigned MinNextUnreserved = InvalidCycle;
2628 unsigned InstanceIdx = 0;
2629 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2630 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2631 assert(NumberOfInstances > 0 &&
2632 "Cannot have zero instances of a ProcResource");
2633
2634 if (isUnbufferedGroup(PIdx)) {
2635 // If any subunits are used by the instruction, report that the
2636 // subunits of the resource group are available at the first cycle
2637 // in which the unit is available, effectively removing the group
2638 // record from hazarding and basing the hazarding decisions on the
2639 // subunit records. Otherwise, choose the first available instance
2640 // from among the subunits. Specifications which assign cycles to
2641 // both the subunits and the group or which use an unbuffered
2642 // group with buffered subunits will appear to schedule
2643 // strangely. In the first case, the additional cycles for the
2644 // group will be ignored. In the second, the group will be
2645 // ignored entirely.
2646 for (const MCWriteProcResEntry &PE :
2647 make_range(SchedModel->getWriteProcResBegin(SC),
2648 SchedModel->getWriteProcResEnd(SC)))
2649 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2650 return std::make_pair(getNextResourceCycleByInstance(
2651 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2652 StartIndex);
2653
2654 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2655 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2656 unsigned NextUnreserved, NextInstanceIdx;
2657 std::tie(NextUnreserved, NextInstanceIdx) =
2658 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2659 if (MinNextUnreserved > NextUnreserved) {
2660 InstanceIdx = NextInstanceIdx;
2661 MinNextUnreserved = NextUnreserved;
2662 }
2663 }
2664 return std::make_pair(MinNextUnreserved, InstanceIdx);
2665 }
2666
2667 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2668 ++I) {
2669 unsigned NextUnreserved =
2670 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2672 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2673 << NextUnreserved << "c\n");
2674 if (MinNextUnreserved > NextUnreserved) {
2675 InstanceIdx = I;
2676 MinNextUnreserved = NextUnreserved;
2677 }
2678 }
2680 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2681 << "[" << InstanceIdx - StartIndex << "]"
2682 << " available @" << MinNextUnreserved << "c"
2683 << "\n");
2684 return std::make_pair(MinNextUnreserved, InstanceIdx);
2685}
2686
2687/// Does this SU have a hazard within the current instruction group.
2688///
2689/// The scheduler supports two modes of hazard recognition. The first is the
2690/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2691/// supports highly complicated in-order reservation tables
2692/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2693///
2694/// The second is a streamlined mechanism that checks for hazards based on
2695/// simple counters that the scheduler itself maintains. It explicitly checks
2696/// for instruction dispatch limitations, including the number of micro-ops that
2697/// can dispatch per cycle.
2698///
2699/// TODO: Also check whether the SU must start a new group.
2701 if (HazardRec->isEnabled()
2702 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2704 << "hazard: SU(" << SU->NodeNum << ") reported by HazardRec\n");
2705 return true;
2706 }
2707
2708 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2709 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2710 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") uops="
2711 << uops << ", CurrMOps = " << CurrMOps << ", "
2712 << "CurrMOps + uops > issue width of "
2713 << SchedModel->getIssueWidth() << "\n");
2714 return true;
2715 }
2716
2717 if (CurrMOps > 0 &&
2718 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2719 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2720 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") must "
2721 << (isTop() ? "begin" : "end") << " group\n");
2722 return true;
2723 }
2724
2725 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2726 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2727 for (const MCWriteProcResEntry &PE :
2728 make_range(SchedModel->getWriteProcResBegin(SC),
2729 SchedModel->getWriteProcResEnd(SC))) {
2730 unsigned ResIdx = PE.ProcResourceIdx;
2731 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2732 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2733 unsigned NRCycle, InstanceIdx;
2734 std::tie(NRCycle, InstanceIdx) =
2735 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2736 if (NRCycle > CurrCycle) {
2737#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2738 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2739#endif
2741 << "hazard: SU(" << SU->NodeNum << ") "
2742 << SchedModel->getResourceName(ResIdx) << '['
2743 << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' << "="
2744 << NRCycle << "c, is later than "
2745 << "CurrCycle = " << CurrCycle << "c\n");
2746 return true;
2747 }
2748 }
2749 }
2750 return false;
2751}
2752
2753// Find the unscheduled node in ReadySUs with the highest latency.
2756 SUnit *LateSU = nullptr;
2757 unsigned RemLatency = 0;
2758 for (SUnit *SU : ReadySUs) {
2759 unsigned L = getUnscheduledLatency(SU);
2760 if (L > RemLatency) {
2761 RemLatency = L;
2762 LateSU = SU;
2763 }
2764 }
2765 if (LateSU) {
2766 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2767 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2768 }
2769 return RemLatency;
2770}
2771
2772// Count resources in this zone and the remaining unscheduled
2773// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2774// resource index, or zero if the zone is issue limited.
2776getOtherResourceCount(unsigned &OtherCritIdx) {
2777 OtherCritIdx = 0;
2778 if (!SchedModel->hasInstrSchedModel())
2779 return 0;
2780
2781 unsigned OtherCritCount = Rem->RemIssueCount
2782 + (RetiredMOps * SchedModel->getMicroOpFactor());
2783 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2784 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2785 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2786 PIdx != PEnd; ++PIdx) {
2787 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2788 if (OtherCount > OtherCritCount) {
2789 OtherCritCount = OtherCount;
2790 OtherCritIdx = PIdx;
2791 }
2792 }
2793 if (OtherCritIdx) {
2794 LLVM_DEBUG(
2795 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2796 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2797 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2798 }
2799 return OtherCritCount;
2800}
2801
2802void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2803 unsigned Idx) {
2804 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2805
2806#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2807 // ReadyCycle was been bumped up to the CurrCycle when this node was
2808 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2809 // scheduling, so may now be greater than ReadyCycle.
2810 if (ReadyCycle > CurrCycle)
2811 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2812#endif
2813
2814 if (ReadyCycle < MinReadyCycle)
2815 MinReadyCycle = ReadyCycle;
2816
2817 // Check for interlocks first. For the purpose of other heuristics, an
2818 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2819 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2820 bool HazardDetected = !IsBuffered && ReadyCycle > CurrCycle;
2821 if (HazardDetected)
2822 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum
2823 << ") ReadyCycle = " << ReadyCycle
2824 << " is later than CurrCycle = " << CurrCycle
2825 << " on an unbuffered resource" << "\n");
2826 else
2827 HazardDetected = checkHazard(SU);
2828
2829 if (!HazardDetected && Available.size() >= ReadyListLimit) {
2830 HazardDetected = true;
2831 LLVM_DEBUG(dbgs().indent(2) << "hazard: Available Q is full (size: "
2832 << Available.size() << ")\n");
2833 }
2834
2835 if (!HazardDetected) {
2836 Available.push(SU);
2838 << "Move SU(" << SU->NodeNum << ") into Available Q\n");
2839
2840 if (InPQueue)
2841 Pending.remove(Pending.begin() + Idx);
2842 return;
2843 }
2844
2845 if (!InPQueue)
2846 Pending.push(SU);
2847}
2848
2849/// Move the boundary of scheduled code by one cycle.
2850void SchedBoundary::bumpCycle(unsigned NextCycle) {
2851 if (SchedModel->getMicroOpBufferSize() == 0) {
2852 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2853 "MinReadyCycle uninitialized");
2854 if (MinReadyCycle > NextCycle)
2855 NextCycle = MinReadyCycle;
2856 }
2857 // Update the current micro-ops, which will issue in the next cycle.
2858 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2859 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2860
2861 // Decrement DependentLatency based on the next cycle.
2862 if ((NextCycle - CurrCycle) > DependentLatency)
2863 DependentLatency = 0;
2864 else
2865 DependentLatency -= (NextCycle - CurrCycle);
2866
2867 if (!HazardRec->isEnabled()) {
2868 // Bypass HazardRec virtual calls.
2869 CurrCycle = NextCycle;
2870 } else {
2871 // Bypass getHazardType calls in case of long latency.
2872 for (; CurrCycle != NextCycle; ++CurrCycle) {
2873 if (isTop())
2874 HazardRec->AdvanceCycle();
2875 else
2876 HazardRec->RecedeCycle();
2877 }
2878 }
2879 CheckPending = true;
2880 IsResourceLimited =
2881 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2882 getScheduledLatency(), true);
2883
2884 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2885 << '\n');
2886}
2887
2888void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2889 ExecutedResCounts[PIdx] += Count;
2890 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2891 MaxExecutedResCount = ExecutedResCounts[PIdx];
2892}
2893
2894/// Add the given processor resource to this scheduled zone.
2895///
2896/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2897/// cycles during which this resource is released.
2898///
2899/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2900/// cycles at which the resource is aquired after issue (assuming no stalls).
2901///
2902/// \return the next cycle at which the instruction may execute without
2903/// oversubscribing resources.
2904unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2905 unsigned ReleaseAtCycle,
2906 unsigned NextCycle,
2907 unsigned AcquireAtCycle) {
2908 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2909 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2910 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2911 << ReleaseAtCycle << "x" << Factor << "u\n");
2912
2913 // Update Executed resources counts.
2915 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2916 Rem->RemainingCounts[PIdx] -= Count;
2917
2918 // Check if this resource exceeds the current critical resource. If so, it
2919 // becomes the critical resource.
2920 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2921 ZoneCritResIdx = PIdx;
2922 LLVM_DEBUG(dbgs() << " *** Critical resource "
2923 << SchedModel->getResourceName(PIdx) << ": "
2924 << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2925 << "c\n");
2926 }
2927 // For reserved resources, record the highest cycle using the resource.
2928 unsigned NextAvailable, InstanceIdx;
2929 std::tie(NextAvailable, InstanceIdx) =
2930 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2931 if (NextAvailable > CurrCycle) {
2932 LLVM_DEBUG(dbgs() << " Resource conflict: "
2933 << SchedModel->getResourceName(PIdx)
2934 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2935 << " reserved until @" << NextAvailable << "\n");
2936 }
2937 return NextAvailable;
2938}
2939
2940/// Move the boundary of scheduled code by one SUnit.
2942 // Update the reservation table.
2943 if (HazardRec->isEnabled()) {
2944 if (!isTop() && SU->isCall) {
2945 // Calls are scheduled with their preceding instructions. For bottom-up
2946 // scheduling, clear the pipeline state before emitting.
2947 HazardRec->Reset();
2948 }
2949 HazardRec->EmitInstruction(SU);
2950 // Scheduling an instruction may have made pending instructions available.
2951 CheckPending = true;
2952 }
2953 // checkHazard should prevent scheduling multiple instructions per cycle that
2954 // exceed the issue width.
2955 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2956 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2957 assert(
2958 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2959 "Cannot schedule this instruction's MicroOps in the current cycle.");
2960
2961 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2962 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2963
2964 unsigned NextCycle = CurrCycle;
2965 switch (SchedModel->getMicroOpBufferSize()) {
2966 case 0:
2967 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2968 break;
2969 case 1:
2970 if (ReadyCycle > NextCycle) {
2971 NextCycle = ReadyCycle;
2972 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2973 }
2974 break;
2975 default:
2976 // We don't currently model the OOO reorder buffer, so consider all
2977 // scheduled MOps to be "retired". We do loosely model in-order resource
2978 // latency. If this instruction uses an in-order resource, account for any
2979 // likely stall cycles.
2980 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2981 NextCycle = ReadyCycle;
2982 break;
2983 }
2984 RetiredMOps += IncMOps;
2985
2986 // Update resource counts and critical resource.
2987 if (SchedModel->hasInstrSchedModel()) {
2988 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2989 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2990 Rem->RemIssueCount -= DecRemIssue;
2991 if (ZoneCritResIdx) {
2992 // Scale scheduled micro-ops for comparing with the critical resource.
2993 unsigned ScaledMOps =
2994 RetiredMOps * SchedModel->getMicroOpFactor();
2995
2996 // If scaled micro-ops are now more than the previous critical resource by
2997 // a full cycle, then micro-ops issue becomes critical.
2998 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2999 >= (int)SchedModel->getLatencyFactor()) {
3000 ZoneCritResIdx = 0;
3001 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
3002 << ScaledMOps / SchedModel->getLatencyFactor()
3003 << "c\n");
3004 }
3005 }
3007 PI = SchedModel->getWriteProcResBegin(SC),
3008 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3009 unsigned RCycle =
3010 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
3011 PI->AcquireAtCycle);
3012 if (RCycle > NextCycle)
3013 NextCycle = RCycle;
3014 }
3015 if (SU->hasReservedResource) {
3016 // For reserved resources, record the highest cycle using the resource.
3017 // For top-down scheduling, this is the cycle in which we schedule this
3018 // instruction plus the number of cycles the operations reserves the
3019 // resource. For bottom-up is it simply the instruction's cycle.
3021 PI = SchedModel->getWriteProcResBegin(SC),
3022 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3023 unsigned PIdx = PI->ProcResourceIdx;
3024 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
3025
3026 if (SchedModel && SchedModel->enableIntervals()) {
3027 unsigned ReservedUntil, InstanceIdx;
3028 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3029 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3030 if (isTop()) {
3031 ReservedResourceSegments[InstanceIdx].add(
3033 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3035 } else {
3036 ReservedResourceSegments[InstanceIdx].add(
3038 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3040 }
3041 } else {
3042
3043 unsigned ReservedUntil, InstanceIdx;
3044 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3045 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3046 if (isTop()) {
3047 ReservedCycles[InstanceIdx] =
3048 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
3049 } else
3050 ReservedCycles[InstanceIdx] = NextCycle;
3051 }
3052 }
3053 }
3054 }
3055 }
3056 // Update ExpectedLatency and DependentLatency.
3057 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
3058 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
3059 if (SU->getDepth() > TopLatency) {
3060 TopLatency = SU->getDepth();
3061 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
3062 << SU->NodeNum << ") " << TopLatency << "c\n");
3063 }
3064 if (SU->getHeight() > BotLatency) {
3065 BotLatency = SU->getHeight();
3066 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
3067 << SU->NodeNum << ") " << BotLatency << "c\n");
3068 }
3069 // If we stall for any reason, bump the cycle.
3070 if (NextCycle > CurrCycle)
3071 bumpCycle(NextCycle);
3072 else
3073 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
3074 // resource limited. If a stall occurred, bumpCycle does this.
3075 IsResourceLimited =
3076 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
3077 getScheduledLatency(), true);
3078
3079 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
3080 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
3081 // one cycle. Since we commonly reach the max MOps here, opportunistically
3082 // bump the cycle to avoid uselessly checking everything in the readyQ.
3083 CurrMOps += IncMOps;
3084
3085 // Bump the cycle count for issue group constraints.
3086 // This must be done after NextCycle has been adjust for all other stalls.
3087 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
3088 // currCycle to X.
3089 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
3090 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
3091 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
3092 << " group\n");
3093 bumpCycle(++NextCycle);
3094 }
3095
3096 while (CurrMOps >= SchedModel->getIssueWidth()) {
3097 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
3098 << CurrCycle << '\n');
3099 bumpCycle(++NextCycle);
3100 }
3102}
3103
3104/// Release pending ready nodes in to the available queue. This makes them
3105/// visible to heuristics.
3107 // If the available queue is empty, it is safe to reset MinReadyCycle.
3108 if (Available.empty())
3109 MinReadyCycle = std::numeric_limits<unsigned>::max();
3110
3111 // Check to see if any of the pending instructions are ready to issue. If
3112 // so, add them to the available queue.
3113 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
3114 SUnit *SU = *(Pending.begin() + I);
3115 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3116
3117 LLVM_DEBUG(dbgs() << "Checking pending node SU(" << SU->NodeNum << ")\n");
3118
3119 if (ReadyCycle < MinReadyCycle)
3120 MinReadyCycle = ReadyCycle;
3121
3122 if (Available.size() >= ReadyListLimit)
3123 break;
3124
3125 releaseNode(SU, ReadyCycle, true, I);
3126 if (E != Pending.size()) {
3127 --I;
3128 --E;
3129 }
3130 }
3131 CheckPending = false;
3132}
3133
3134/// Remove SU from the ready set for this boundary.
3136 if (Available.isInQueue(SU))
3137 Available.remove(Available.find(SU));
3138 else {
3139 assert(Pending.isInQueue(SU) && "bad ready count");
3140 Pending.remove(Pending.find(SU));
3141 }
3142}
3143
3144/// If this queue only has one ready candidate, return it. As a side effect,
3145/// defer any nodes that now hit a hazard, and advance the cycle until at least
3146/// one node is ready. If multiple instructions are ready, return NULL.
3148 if (CheckPending)
3150
3151 // Defer any ready instrs that now have a hazard.
3152 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
3153 if (checkHazard(*I)) {
3154 Pending.push(*I);
3155 I = Available.remove(I);
3156 continue;
3157 }
3158 ++I;
3159 }
3160 for (unsigned i = 0; Available.empty(); ++i) {
3161// FIXME: Re-enable assert once PR20057 is resolved.
3162// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3163// "permanent hazard");
3164 (void)i;
3165 bumpCycle(CurrCycle + 1);
3167 }
3168
3169 LLVM_DEBUG(Pending.dump());
3170 LLVM_DEBUG(Available.dump());
3171
3172 if (Available.size() == 1)
3173 return *Available.begin();
3174 return nullptr;
3175}
3176
3177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3178
3179/// Dump the content of the \ref ReservedCycles vector for the
3180/// resources that are used in the basic block.
3181///
3183 if (!SchedModel->hasInstrSchedModel())
3184 return;
3185
3186 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3187 unsigned StartIdx = 0;
3188
3189 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3190 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3191 std::string ResName = SchedModel->getResourceName(ResIdx);
3192 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3193 dbgs() << ResName << "(" << UnitIdx << ") = ";
3194 if (SchedModel && SchedModel->enableIntervals()) {
3195 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3196 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3197 else
3198 dbgs() << "{ }\n";
3199 } else
3200 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3201 }
3202 StartIdx += NumUnits;
3203 }
3204}
3205
3206// This is useful information to dump after bumpNode.
3207// Note that the Queue contents are more useful before pickNodeFromQueue.
3209 unsigned ResFactor;
3210 unsigned ResCount;
3211 if (ZoneCritResIdx) {
3212 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3213 ResCount = getResourceCount(ZoneCritResIdx);
3214 } else {
3215 ResFactor = SchedModel->getMicroOpFactor();
3216 ResCount = RetiredMOps * ResFactor;
3217 }
3218 unsigned LFactor = SchedModel->getLatencyFactor();
3219 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3220 << " Retired: " << RetiredMOps;
3221 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3222 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3223 << ResCount / ResFactor << " "
3224 << SchedModel->getResourceName(ZoneCritResIdx)
3225 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3226 << (IsResourceLimited ? " - Resource" : " - Latency")
3227 << " limited.\n";
3230}
3231#endif
3232
3233//===----------------------------------------------------------------------===//
3234// GenericScheduler - Generic implementation of MachineSchedStrategy.
3235//===----------------------------------------------------------------------===//
3236
3240 if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
3241 return;
3242
3243 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3245 PI = SchedModel->getWriteProcResBegin(SC),
3246 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3247 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3248 ResDelta.CritResources += PI->ReleaseAtCycle;
3249 if (PI->ProcResourceIdx == Policy.DemandResIdx)
3250 ResDelta.DemandedResources += PI->ReleaseAtCycle;
3251 }
3252}
3253
3254/// Compute remaining latency. We need this both to determine whether the
3255/// overall schedule has become latency-limited and whether the instructions
3256/// outside this zone are resource or latency limited.
3257///
3258/// The "dependent" latency is updated incrementally during scheduling as the
3259/// max height/depth of scheduled nodes minus the cycles since it was
3260/// scheduled:
3261/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3262///
3263/// The "independent" latency is the max ready queue depth:
3264/// ILat = max N.depth for N in Available|Pending
3265///
3266/// RemainingLatency is the greater of independent and dependent latency.
3267///
3268/// These computations are expensive, especially in DAGs with many edges, so
3269/// only do them if necessary.
3270static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3271 unsigned RemLatency = CurrZone.getDependentLatency();
3272 RemLatency = std::max(RemLatency,
3273 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3274 RemLatency = std::max(RemLatency,
3275 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3276 return RemLatency;
3277}
3278
3279/// Returns true if the current cycle plus remaning latency is greater than
3280/// the critical path in the scheduling region.
3281bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3282 SchedBoundary &CurrZone,
3283 bool ComputeRemLatency,
3284 unsigned &RemLatency) const {
3285 // The current cycle is already greater than the critical path, so we are
3286 // already latency limited and don't need to compute the remaining latency.
3287 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3288 return true;
3289
3290 // If we haven't scheduled anything yet, then we aren't latency limited.
3291 if (CurrZone.getCurrCycle() == 0)
3292 return false;
3293
3294 if (ComputeRemLatency)
3295 RemLatency = computeRemLatency(CurrZone);
3296
3297 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3298}
3299
3300/// Set the CandPolicy given a scheduling zone given the current resources and
3301/// latencies inside and outside the zone.
3303 SchedBoundary &CurrZone,
3304 SchedBoundary *OtherZone) {
3305 // Apply preemptive heuristics based on the total latency and resources
3306 // inside and outside this zone. Potential stalls should be considered before
3307 // following this policy.
3308
3309 // Compute the critical resource outside the zone.
3310 unsigned OtherCritIdx = 0;
3311 unsigned OtherCount =
3312 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3313
3314 bool OtherResLimited = false;
3315 unsigned RemLatency = 0;
3316 bool RemLatencyComputed = false;
3317 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3318 RemLatency = computeRemLatency(CurrZone);
3319 RemLatencyComputed = true;
3320 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3321 OtherCount, RemLatency, false);
3322 }
3323
3324 // Schedule aggressively for latency in PostRA mode. We don't check for
3325 // acyclic latency during PostRA, and highly out-of-order processors will
3326 // skip PostRA scheduling.
3327 if (!OtherResLimited &&
3328 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3329 RemLatency))) {
3330 Policy.ReduceLatency |= true;
3331 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3332 << " RemainingLatency " << RemLatency << " + "
3333 << CurrZone.getCurrCycle() << "c > CritPath "
3334 << Rem.CriticalPath << "\n");
3335 }
3336 // If the same resource is limiting inside and outside the zone, do nothing.
3337 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3338 return;
3339
3340 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3341 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3342 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3343 } if (OtherResLimited) dbgs()
3344 << " RemainingLimit: "
3345 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3346 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3347 << " Latency limited both directions.\n");
3348
3349 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3350 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3351
3352 if (OtherResLimited)
3353 Policy.DemandResIdx = OtherCritIdx;
3354}
3355
3356#ifndef NDEBUG
3359 // clang-format off
3360 switch (Reason) {
3361 case NoCand: return "NOCAND ";
3362 case Only1: return "ONLY1 ";
3363 case PhysReg: return "PHYS-REG ";
3364 case RegExcess: return "REG-EXCESS";
3365 case RegCritical: return "REG-CRIT ";
3366 case Stall: return "STALL ";
3367 case Cluster: return "CLUSTER ";
3368 case Weak: return "WEAK ";
3369 case RegMax: return "REG-MAX ";
3370 case ResourceReduce: return "RES-REDUCE";
3371 case ResourceDemand: return "RES-DEMAND";
3372 case TopDepthReduce: return "TOP-DEPTH ";
3373 case TopPathReduce: return "TOP-PATH ";
3374 case BotHeightReduce:return "BOT-HEIGHT";
3375 case BotPathReduce: return "BOT-PATH ";
3376 case NodeOrder: return "ORDER ";
3377 case FirstValid: return "FIRST ";
3378 };
3379 // clang-format on
3380 llvm_unreachable("Unknown reason!");
3381}
3382
3385 unsigned ResIdx = 0;
3386 unsigned Latency = 0;
3387 switch (Cand.Reason) {
3388 default:
3389 break;
3390 case RegExcess:
3391 P = Cand.RPDelta.Excess;
3392 break;
3393 case RegCritical:
3394 P = Cand.RPDelta.CriticalMax;
3395 break;
3396 case RegMax:
3397 P = Cand.RPDelta.CurrentMax;
3398 break;
3399 case ResourceReduce:
3400 ResIdx = Cand.Policy.ReduceResIdx;
3401 break;
3402 case ResourceDemand:
3403 ResIdx = Cand.Policy.DemandResIdx;
3404 break;
3405 case TopDepthReduce:
3406 Latency = Cand.SU->getDepth();
3407 break;
3408 case TopPathReduce:
3409 Latency = Cand.SU->getHeight();
3410 break;
3411 case BotHeightReduce:
3412 Latency = Cand.SU->getHeight();
3413 break;
3414 case BotPathReduce:
3415 Latency = Cand.SU->getDepth();
3416 break;
3417 }
3418 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3419 if (P.isValid())
3420 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3421 << ":" << P.getUnitInc() << " ";
3422 else
3423 dbgs() << " ";
3424 if (ResIdx)
3425 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3426 else
3427 dbgs() << " ";
3428 if (Latency)
3429 dbgs() << " " << Latency << " cycles ";
3430 else
3431 dbgs() << " ";
3432 dbgs() << '\n';
3433}
3434#endif
3435
3436/// Return true if this heuristic determines order.
3437/// TODO: Consider refactor return type of these functions as integer or enum,
3438/// as we may need to differentiate whether TryCand is better than Cand.
3439bool llvm::tryLess(int TryVal, int CandVal,
3443 if (TryVal < CandVal) {
3444 TryCand.Reason = Reason;
3445 return true;
3446 }
3447 if (TryVal > CandVal) {
3448 if (Cand.Reason > Reason)
3449 Cand.Reason = Reason;
3450 return true;
3451 }
3452 return false;
3453}
3454
3455bool llvm::tryGreater(int TryVal, int CandVal,
3459 if (TryVal > CandVal) {
3460 TryCand.Reason = Reason;
3461 return true;
3462 }
3463 if (TryVal < CandVal) {
3464 if (Cand.Reason > Reason)
3465 Cand.Reason = Reason;
3466 return true;
3467 }
3468 return false;
3469}
3470
3473 SchedBoundary &Zone) {
3474 if (Zone.isTop()) {
3475 // Prefer the candidate with the lesser depth, but only if one of them has
3476 // depth greater than the total latency scheduled so far, otherwise either
3477 // of them could be scheduled now with no stall.
3478 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3479 Zone.getScheduledLatency()) {
3480 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3482 return true;
3483 }
3484 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3486 return true;
3487 } else {
3488 // Prefer the candidate with the lesser height, but only if one of them has
3489 // height greater than the total latency scheduled so far, otherwise either
3490 // of them could be scheduled now with no stall.
3491 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3492 Zone.getScheduledLatency()) {
3493 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3495 return true;
3496 }
3497 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3499 return true;
3500 }
3501 return false;
3502}
3503
3504static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
3505 bool IsPostRA = false) {
3506 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3507 << GenericSchedulerBase::getReasonStr(Reason) << " ["
3508 << (IsPostRA ? "post-RA" : "pre-RA") << "]\n");
3509
3510 if (IsPostRA) {
3511 if (IsTop)
3512 NumTopPostRA++;
3513 else
3514 NumBotPostRA++;
3515
3516 switch (Reason) {
3518 NumNoCandPostRA++;
3519 return;
3521 NumOnly1PostRA++;
3522 return;
3524 NumPhysRegPostRA++;
3525 return;
3527 NumRegExcessPostRA++;
3528 return;
3530 NumRegCriticalPostRA++;
3531 return;
3533 NumStallPostRA++;
3534 return;
3536 NumClusterPostRA++;
3537 return;
3539 NumWeakPostRA++;
3540 return;
3542 NumRegMaxPostRA++;
3543 return;
3545 NumResourceReducePostRA++;
3546 return;
3548 NumResourceDemandPostRA++;
3549 return;
3551 NumTopDepthReducePostRA++;
3552 return;
3554 NumTopPathReducePostRA++;
3555 return;
3557 NumBotHeightReducePostRA++;
3558 return;
3560 NumBotPathReducePostRA++;
3561 return;
3563 NumNodeOrderPostRA++;
3564 return;
3566 NumFirstValidPostRA++;
3567 return;
3568 };
3569 } else {
3570 if (IsTop)
3571 NumTopPreRA++;
3572 else
3573 NumBotPreRA++;
3574
3575 switch (Reason) {
3577 NumNoCandPreRA++;
3578 return;
3580 NumOnly1PreRA++;
3581 return;
3583 NumPhysRegPreRA++;
3584 return;
3586 NumRegExcessPreRA++;
3587 return;
3589 NumRegCriticalPreRA++;
3590 return;
3592 NumStallPreRA++;
3593 return;
3595 NumClusterPreRA++;
3596 return;
3598 NumWeakPreRA++;
3599 return;
3601 NumRegMaxPreRA++;
3602 return;
3604 NumResourceReducePreRA++;
3605 return;
3607 NumResourceDemandPreRA++;
3608 return;
3610 NumTopDepthReducePreRA++;
3611 return;
3613 NumTopPathReducePreRA++;
3614 return;
3616 NumBotHeightReducePreRA++;
3617 return;
3619 NumBotPathReducePreRA++;
3620 return;
3622 NumNodeOrderPreRA++;
3623 return;
3625 NumFirstValidPreRA++;
3626 return;
3627 };
3628 }
3629 llvm_unreachable("Unknown reason!");
3630}
3631
3633 bool IsPostRA = false) {
3634 tracePick(Cand.Reason, Cand.AtTop, IsPostRA);
3635}
3636
3638 assert(dag->hasVRegLiveness() &&
3639 "(PreRA)GenericScheduler needs vreg liveness");
3640 DAG = static_cast<ScheduleDAGMILive*>(dag);
3641 SchedModel = DAG->getSchedModel();
3642 TRI = DAG->TRI;
3643
3644 if (RegionPolicy.ComputeDFSResult)
3645 DAG->computeDFSResult();
3646
3647 Rem.init(DAG, SchedModel);
3648 Top.init(DAG, SchedModel, &Rem);
3649 Bot.init(DAG, SchedModel, &Rem);
3650
3651 // Initialize resource counts.
3652
3653 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3654 // are disabled, then these HazardRecs will be disabled.
3655 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3656 if (!Top.HazardRec) {
3657 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3658 }
3659 if (!Bot.HazardRec) {
3660 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3661 }
3662 TopCand.SU = nullptr;
3663 BotCand.SU = nullptr;
3664
3667}
3668
3669/// Initialize the per-region scheduling policy.
3672 unsigned NumRegionInstrs) {
3673 const MachineFunction &MF = *Begin->getMF();
3674 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3675
3676 // Avoid setting up the register pressure tracker for small regions to save
3677 // compile time. As a rough heuristic, only track pressure when the number of
3678 // schedulable instructions exceeds half the allocatable integer register file
3679 // that is the largest legal integer regiser type.
3680 RegionPolicy.ShouldTrackPressure = true;
3681 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3683 if (TLI->isTypeLegal(LegalIntVT)) {
3684 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3685 TLI->getRegClassFor(LegalIntVT));
3686 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3687 break;
3688 }
3689 }
3690
3691 // For generic targets, we default to bottom-up, because it's simpler and more
3692 // compile-time optimizations have been implemented in that direction.
3693 RegionPolicy.OnlyBottomUp = true;
3694
3695 // Allow the subtarget to override default policy.
3696 SchedRegion Region(Begin, End, NumRegionInstrs);
3698
3699 // After subtarget overrides, apply command line options.
3700 if (!EnableRegPressure) {
3701 RegionPolicy.ShouldTrackPressure = false;
3702 RegionPolicy.ShouldTrackLaneMasks = false;
3703 }
3704
3706 RegionPolicy.OnlyTopDown = true;
3707 RegionPolicy.OnlyBottomUp = false;
3708 } else if (PreRADirection == MISched::BottomUp) {
3709 RegionPolicy.OnlyTopDown = false;
3710 RegionPolicy.OnlyBottomUp = true;
3711 } else if (PreRADirection == MISched::Bidirectional) {
3712 RegionPolicy.OnlyBottomUp = false;
3713 RegionPolicy.OnlyTopDown = false;
3714 }
3715
3716 BotIdx = NumRegionInstrs - 1;
3717 this->NumRegionInstrs = NumRegionInstrs;
3718}
3719
3721 // Cannot completely remove virtual function even in release mode.
3722#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3723 dbgs() << "GenericScheduler RegionPolicy: "
3724 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3725 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3726 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3727 << "\n";
3728#endif
3729}
3730
3731/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3732/// critical path by more cycles than it takes to drain the instruction buffer.
3733/// We estimate an upper bounds on in-flight instructions as:
3734///
3735/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3736/// InFlightIterations = AcyclicPath / CyclesPerIteration
3737/// InFlightResources = InFlightIterations * LoopResources
3738///
3739/// TODO: Check execution resources in addition to IssueCount.
3741 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3742 return;
3743
3744 // Scaled number of cycles per loop iteration.
3745 unsigned IterCount =
3746 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3747 Rem.RemIssueCount);
3748 // Scaled acyclic critical path.
3749 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3750 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3751 unsigned InFlightCount =
3752 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3753 unsigned BufferLimit =
3754 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3755
3756 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3757
3758 LLVM_DEBUG(
3759 dbgs() << "IssueCycles="
3760 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3761 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3762 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3763 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3764 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3765 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3766}
3767
3769 Rem.CriticalPath = DAG->ExitSU.getDepth();
3770
3771 // Some roots may not feed into ExitSU. Check all of them in case.
3772 for (const SUnit *SU : Bot.Available) {
3773 if (SU->getDepth() > Rem.CriticalPath)
3774 Rem.CriticalPath = SU->getDepth();
3775 }
3776 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3778 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3779 }
3780
3781 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3782 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3784 }
3785}
3786
3787bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP,
3791 const TargetRegisterInfo *TRI,
3792 const MachineFunction &MF) {
3793 // If one candidate decreases and the other increases, go with it.
3794 // Invalid candidates have UnitInc==0.
3795 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3796 Reason)) {
3797 return true;
3798 }
3799 // Do not compare the magnitude of pressure changes between top and bottom
3800 // boundary.
3801 if (Cand.AtTop != TryCand.AtTop)
3802 return false;
3803
3804 // If both candidates affect the same set in the same boundary, go with the
3805 // smallest increase.
3806 unsigned TryPSet = TryP.getPSetOrMax();
3807 unsigned CandPSet = CandP.getPSetOrMax();
3808 if (TryPSet == CandPSet) {
3809 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3810 Reason);
3811 }
3812
3813 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3814 std::numeric_limits<int>::max();
3815
3816 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3817 std::numeric_limits<int>::max();
3818
3819 // If the candidates are decreasing pressure, reverse priority.
3820 if (TryP.getUnitInc() < 0)
3821 std::swap(TryRank, CandRank);
3822 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3823}
3824
3825unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) {
3826 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3827}
3828
3829/// Minimize physical register live ranges. Regalloc wants them adjacent to
3830/// their physreg def/use.
3831///
3832/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3833/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3834/// with the operation that produces or consumes the physreg. We'll do this when
3835/// regalloc has support for parallel copies.
3836int llvm::biasPhysReg(const SUnit *SU, bool isTop) {
3837 const MachineInstr *MI = SU->getInstr();
3838
3839 if (MI->isCopy()) {
3840 unsigned ScheduledOper = isTop ? 1 : 0;
3841 unsigned UnscheduledOper = isTop ? 0 : 1;
3842 // If we have already scheduled the physreg produce/consumer, immediately
3843 // schedule the copy.
3844 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3845 return 1;
3846 // If the physreg is at the boundary, defer it. Otherwise schedule it
3847 // immediately to free the dependent. We can hoist the copy later.
3848 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3849 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3850 return AtBoundary ? -1 : 1;
3851 }
3852
3853 if (MI->isMoveImmediate()) {
3854 // If we have a move immediate and all successors have been assigned, bias
3855 // towards scheduling this later. Make sure all register defs are to
3856 // physical registers.
3857 bool DoBias = true;
3858 for (const MachineOperand &Op : MI->defs()) {
3859 if (Op.isReg() && !Op.getReg().isPhysical()) {
3860 DoBias = false;
3861 break;
3862 }
3863 }
3864
3865 if (DoBias)
3866 return isTop ? -1 : 1;
3867 }
3868
3869 return 0;
3870}
3871
3873 bool AtTop,
3874 const RegPressureTracker &RPTracker,
3875 RegPressureTracker &TempTracker) {
3876 Cand.SU = SU;
3877 Cand.AtTop = AtTop;
3878 if (DAG->isTrackingPressure()) {
3879 if (AtTop) {
3880 TempTracker.getMaxDownwardPressureDelta(
3881 Cand.SU->getInstr(),
3882 Cand.RPDelta,
3883 DAG->getRegionCriticalPSets(),
3884 DAG->getRegPressure().MaxSetPressure);
3885 } else {
3886 if (VerifyScheduling) {
3887 TempTracker.getMaxUpwardPressureDelta(
3888 Cand.SU->getInstr(),
3889 &DAG->getPressureDiff(Cand.SU),
3890 Cand.RPDelta,
3891 DAG->getRegionCriticalPSets(),
3892 DAG->getRegPressure().MaxSetPressure);
3893 } else {
3894 RPTracker.getUpwardPressureDelta(
3895 Cand.SU->getInstr(),
3896 DAG->getPressureDiff(Cand.SU),
3897 Cand.RPDelta,
3898 DAG->getRegionCriticalPSets(),
3899 DAG->getRegPressure().MaxSetPressure);
3900 }
3901 }
3902 }
3903 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3904 << " Try SU(" << Cand.SU->NodeNum << ") "
3905 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3906 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3907}
3908
3909/// Apply a set of heuristics to a new candidate. Heuristics are currently
3910/// hierarchical. This may be more efficient than a graduated cost model because
3911/// we don't need to evaluate all aspects of the model for each node in the
3912/// queue. But it's really done to make the heuristics easier to debug and
3913/// statistically analyze.
3914///
3915/// \param Cand provides the policy and current best candidate.
3916/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3917/// \param Zone describes the scheduled zone that we are extending, or nullptr
3918/// if Cand is from a different zone than TryCand.
3919/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3921 SchedCandidate &TryCand,
3922 SchedBoundary *Zone) const {
3923 // Initialize the candidate if needed.
3924 if (!Cand.isValid()) {
3925 TryCand.Reason = FirstValid;
3926 return true;
3927 }
3928
3929 // Bias PhysReg Defs and copies to their uses and defined respectively.
3930 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3931 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3932 return TryCand.Reason != NoCand;
3933
3934 // Avoid exceeding the target's limit.
3935 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3936 Cand.RPDelta.Excess,
3937 TryCand, Cand, RegExcess, TRI,
3938 DAG->MF))
3939 return TryCand.Reason != NoCand;
3940
3941 // Avoid increasing the max critical pressure in the scheduled region.
3942 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3943 Cand.RPDelta.CriticalMax,
3944 TryCand, Cand, RegCritical, TRI,
3945 DAG->MF))
3946 return TryCand.Reason != NoCand;
3947
3948 // We only compare a subset of features when comparing nodes between
3949 // Top and Bottom boundary. Some properties are simply incomparable, in many
3950 // other instances we should only override the other boundary if something
3951 // is a clear good pick on one boundary. Skip heuristics that are more
3952 // "tie-breaking" in nature.
3953 bool SameBoundary = Zone != nullptr;
3954 if (SameBoundary) {
3955 // For loops that are acyclic path limited, aggressively schedule for
3956 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3957 // heuristics to take precedence.
3958 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3959 tryLatency(TryCand, Cand, *Zone))
3960 return TryCand.Reason != NoCand;
3961
3962 // Prioritize instructions that read unbuffered resources by stall cycles.
3963 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3964 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3965 return TryCand.Reason != NoCand;
3966 }
3967
3968 // Keep clustered nodes together to encourage downstream peephole
3969 // optimizations which may reduce resource requirements.
3970 //
3971 // This is a best effort to set things up for a post-RA pass. Optimizations
3972 // like generating loads of multiple registers should ideally be done within
3973 // the scheduler pass by combining the loads during DAG postprocessing.
3974 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
3975 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
3976 bool CandIsClusterSucc =
3977 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
3978 bool TryCandIsClusterSucc =
3979 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
3980
3981 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
3982 Cluster))
3983 return TryCand.Reason != NoCand;
3984
3985 if (SameBoundary) {
3986 // Weak edges are for clustering and other constraints.
3987 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3988 getWeakLeft(Cand.SU, Cand.AtTop),
3989 TryCand, Cand, Weak))
3990 return TryCand.Reason != NoCand;
3991 }
3992
3993 // Avoid increasing the max pressure of the entire region.
3994 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3995 Cand.RPDelta.CurrentMax,
3996 TryCand, Cand, RegMax, TRI,
3997 DAG->MF))
3998 return TryCand.Reason != NoCand;
3999
4000 if (SameBoundary) {
4001 // Avoid critical resource consumption and balance the schedule.
4004 TryCand, Cand, ResourceReduce))
4005 return TryCand.Reason != NoCand;
4008 TryCand, Cand, ResourceDemand))
4009 return TryCand.Reason != NoCand;
4010
4011 // Avoid serializing long latency dependence chains.
4012 // For acyclic path limited loops, latency was already checked above.
4013 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
4014 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
4015 return TryCand.Reason != NoCand;
4016
4017 // Fall through to original instruction order.
4018 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
4019 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
4020 TryCand.Reason = NodeOrder;
4021 return true;
4022 }
4023 }
4024
4025 return false;
4026}
4027
4028/// Pick the best candidate from the queue.
4029///
4030/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
4031/// DAG building. To adjust for the current scheduling location we need to
4032/// maintain the number of vreg uses remaining to be top-scheduled.
4034 const CandPolicy &ZonePolicy,
4035 const RegPressureTracker &RPTracker,
4036 SchedCandidate &Cand) {
4037 // getMaxPressureDelta temporarily modifies the tracker.
4038 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
4039
4040 ReadyQueue &Q = Zone.Available;
4041 for (SUnit *SU : Q) {
4042
4043 SchedCandidate TryCand(ZonePolicy);
4044 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
4045 // Pass SchedBoundary only when comparing nodes from the same boundary.
4046 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
4047 if (tryCandidate(Cand, TryCand, ZoneArg)) {
4048 // Initialize resource delta if needed in case future heuristics query it.
4049 if (TryCand.ResDelta == SchedResourceDelta())
4051 Cand.setBest(TryCand);
4053 }
4054 }
4055}
4056
4057/// Pick the best candidate node from either the top or bottom queue.
4059 // Schedule as far as possible in the direction of no choice. This is most
4060 // efficient, but also provides the best heuristics for CriticalPSets.
4061 if (SUnit *SU = Bot.pickOnlyChoice()) {
4062 IsTopNode = false;
4063 tracePick(Only1, /*IsTopNode=*/false);
4064 return SU;
4065 }
4066 if (SUnit *SU = Top.pickOnlyChoice()) {
4067 IsTopNode = true;
4068 tracePick(Only1, /*IsTopNode=*/true);
4069 return SU;
4070 }
4071 // Set the bottom-up policy based on the state of the current bottom zone and
4072 // the instructions outside the zone, including the top zone.
4073 CandPolicy BotPolicy;
4074 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
4075 // Set the top-down policy based on the state of the current top zone and
4076 // the instructions outside the zone, including the bottom zone.
4077 CandPolicy TopPolicy;
4078 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
4079
4080 // See if BotCand is still valid (because we previously scheduled from Top).
4081 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4082 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4083 BotCand.Policy != BotPolicy) {
4084 BotCand.reset(CandPolicy());
4085 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
4086 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4087 } else {
4089#ifndef NDEBUG
4090 if (VerifyScheduling) {
4091 SchedCandidate TCand;
4092 TCand.reset(CandPolicy());
4093 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
4094 assert(TCand.SU == BotCand.SU &&
4095 "Last pick result should correspond to re-picking right now");
4096 }
4097#endif
4098 }
4099
4100 // Check if the top Q has a better candidate.
4101 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4102 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4103 TopCand.Policy != TopPolicy) {
4104 TopCand.reset(CandPolicy());
4105 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
4106 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4107 } else {
4109#ifndef NDEBUG
4110 if (VerifyScheduling) {
4111 SchedCandidate TCand;
4112 TCand.reset(CandPolicy());
4113 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
4114 assert(TCand.SU == TopCand.SU &&
4115 "Last pick result should correspond to re-picking right now");
4116 }
4117#endif
4118 }
4119
4120 // Pick best from BotCand and TopCand.
4121 assert(BotCand.isValid());
4122 assert(TopCand.isValid());
4123 SchedCandidate Cand = BotCand;
4124 TopCand.Reason = NoCand;
4125 if (tryCandidate(Cand, TopCand, nullptr)) {
4126 Cand.setBest(TopCand);
4128 }
4129
4130 IsTopNode = Cand.AtTop;
4131 tracePick(Cand);
4132 return Cand.SU;
4133}
4134
4135/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
4137 if (DAG->top() == DAG->bottom()) {
4138 assert(Top.Available.empty() && Top.Pending.empty() &&
4139 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4140 return nullptr;
4141 }
4142 SUnit *SU;
4143 if (RegionPolicy.OnlyTopDown) {
4144 SU = Top.pickOnlyChoice();
4145 if (!SU) {
4146 CandPolicy NoPolicy;
4147 TopCand.reset(NoPolicy);
4148 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
4149 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4151 SU = TopCand.SU;
4152 }
4153 IsTopNode = true;
4154 } else if (RegionPolicy.OnlyBottomUp) {
4155 SU = Bot.pickOnlyChoice();
4156 if (!SU) {
4157 CandPolicy NoPolicy;
4158 BotCand.reset(NoPolicy);
4159 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
4160 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4162 SU = BotCand.SU;
4163 }
4164 IsTopNode = false;
4165 } else {
4166 SU = pickNodeBidirectional(IsTopNode);
4167 }
4168 assert(!SU->isScheduled && "SUnit scheduled twice.");
4169
4170 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
4171 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
4172 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
4173 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
4174 //
4175 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
4176 // isBottomReady. That is, it didn't factor into the decision to choose SU
4177 // because it isTopReady or isBottomReady, respectively. In fact, if the
4178 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
4179 // queues respectivley contain the original roots and don't get updated when
4180 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
4181 // because we schduled everything but the top roots. Conversley, if SU
4182 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
4183 // but the bottom roots. If its in a queue even coincidentally, it should be
4184 // removed so it does not get re-picked in a subsequent pickNode call.
4185 if (SU->isTopReady())
4186 Top.removeReady(SU);
4187 if (SU->isBottomReady())
4188 Bot.removeReady(SU);
4189
4190 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4191 << *SU->getInstr());
4192
4193 if (IsTopNode) {
4194 if (SU->NodeNum == TopIdx++)
4195 ++NumInstrsInSourceOrderPreRA;
4196 } else {
4197 assert(BotIdx < NumRegionInstrs && "out of bounds");
4198 if (SU->NodeNum == BotIdx--)
4199 ++NumInstrsInSourceOrderPreRA;
4200 }
4201
4202 NumInstrsScheduledPreRA += 1;
4203
4204 return SU;
4205}
4206
4208 MachineBasicBlock::iterator InsertPos = SU->getInstr();
4209 if (!isTop)
4210 ++InsertPos;
4211 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
4212
4213 // Find already scheduled copies with a single physreg dependence and move
4214 // them just above the scheduled instruction.
4215 for (SDep &Dep : Deps) {
4216 if (Dep.getKind() != SDep::Data || !Dep.getReg().isPhysical())
4217 continue;
4218 SUnit *DepSU = Dep.getSUnit();
4219 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
4220 continue;
4221 MachineInstr *Copy = DepSU->getInstr();
4222 if (!Copy->isCopy() && !Copy->isMoveImmediate())
4223 continue;
4224 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
4225 DAG->dumpNode(*Dep.getSUnit()));
4226 DAG->moveInstruction(Copy, InsertPos);
4227 }
4228}
4229
4230/// Update the scheduler's state after scheduling a node. This is the same node
4231/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
4232/// update it's state based on the current cycle before MachineSchedStrategy
4233/// does.
4234///
4235/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
4236/// them here. See comments in biasPhysReg.
4237void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4238 if (IsTopNode) {
4239 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4241 LLVM_DEBUG({
4243 ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
4244 dbgs() << " Top Cluster: ";
4245 for (auto *N : *TopCluster)
4246 dbgs() << N->NodeNum << '\t';
4247 dbgs() << '\n';
4248 }
4249 });
4250 Top.bumpNode(SU);
4251 if (SU->hasPhysRegUses)
4252 reschedulePhysReg(SU, true);
4253 } else {
4254 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4256 LLVM_DEBUG({
4258 ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
4259 dbgs() << " Bot Cluster: ";
4260 for (auto *N : *BotCluster)
4261 dbgs() << N->NodeNum << '\t';
4262 dbgs() << '\n';
4263 }
4264 });
4265 Bot.bumpNode(SU);
4266 if (SU->hasPhysRegDefs)
4267 reschedulePhysReg(SU, false);
4268 }
4269}
4270
4274
4275static MachineSchedRegistry
4276GenericSchedRegistry("converge", "Standard converging scheduler.",
4278
4279//===----------------------------------------------------------------------===//
4280// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4281//===----------------------------------------------------------------------===//
4282
4284 DAG = Dag;
4285 SchedModel = DAG->getSchedModel();
4286 TRI = DAG->TRI;
4287
4288 Rem.init(DAG, SchedModel);
4289 Top.init(DAG, SchedModel, &Rem);
4290 Bot.init(DAG, SchedModel, &Rem);
4291
4292 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4293 // or are disabled, then these HazardRecs will be disabled.
4294 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
4295 if (!Top.HazardRec) {
4296 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4297 }
4298 if (!Bot.HazardRec) {
4299 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4300 }
4303}
4304
4307 unsigned NumRegionInstrs) {
4308 const MachineFunction &MF = *Begin->getMF();
4309
4310 // Default to top-down because it was implemented first and existing targets
4311 // expect that behavior by default.
4312 RegionPolicy.OnlyTopDown = true;
4313 RegionPolicy.OnlyBottomUp = false;
4314
4315 // Allow the subtarget to override default policy.
4316 SchedRegion Region(Begin, End, NumRegionInstrs);
4318
4319 // After subtarget overrides, apply command line options.
4321 RegionPolicy.OnlyTopDown = true;
4322 RegionPolicy.OnlyBottomUp = false;
4323 } else if (PostRADirection == MISched::BottomUp) {
4324 RegionPolicy.OnlyTopDown = false;
4325 RegionPolicy.OnlyBottomUp = true;
4327 RegionPolicy.OnlyBottomUp = false;
4328 RegionPolicy.OnlyTopDown = false;
4329 }
4330
4331 BotIdx = NumRegionInstrs - 1;
4332 this->NumRegionInstrs = NumRegionInstrs;
4333}
4334
4336 Rem.CriticalPath = DAG->ExitSU.getDepth();
4337
4338 // Some roots may not feed into ExitSU. Check all of them in case.
4339 for (const SUnit *SU : Bot.Available) {
4340 if (SU->getDepth() > Rem.CriticalPath)
4341 Rem.CriticalPath = SU->getDepth();
4342 }
4343 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
4345 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4346 }
4347}
4348
4349/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4350///
4351/// \param Cand provides the policy and current best candidate.
4352/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4353/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4355 SchedCandidate &TryCand) {
4356 // Initialize the candidate if needed.
4357 if (!Cand.isValid()) {
4358 TryCand.Reason = FirstValid;
4359 return true;
4360 }
4361
4362 // Prioritize instructions that read unbuffered resources by stall cycles.
4363 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
4364 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
4365 return TryCand.Reason != NoCand;
4366
4367 // Keep clustered nodes together.
4368 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
4369 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
4370 bool CandIsClusterSucc =
4371 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
4372 bool TryCandIsClusterSucc =
4373 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
4374
4375 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
4376 Cluster))
4377 return TryCand.Reason != NoCand;
4378 // Avoid critical resource consumption and balance the schedule.
4380 TryCand, Cand, ResourceReduce))
4381 return TryCand.Reason != NoCand;
4384 TryCand, Cand, ResourceDemand))
4385 return TryCand.Reason != NoCand;
4386
4387 // We only compare a subset of features when comparing nodes between
4388 // Top and Bottom boundary.
4389 if (Cand.AtTop == TryCand.AtTop) {
4390 // Avoid serializing long latency dependence chains.
4391 if (Cand.Policy.ReduceLatency &&
4392 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
4393 return TryCand.Reason != NoCand;
4394 }
4395
4396 // Fall through to original instruction order.
4397 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4398 TryCand.Reason = NodeOrder;
4399 return true;
4400 }
4401
4402 return false;
4403}
4404
4406 SchedCandidate &Cand) {
4407 ReadyQueue &Q = Zone.Available;
4408 for (SUnit *SU : Q) {
4409 SchedCandidate TryCand(Cand.Policy);
4410 TryCand.SU = SU;
4411 TryCand.AtTop = Zone.isTop();
4413 if (tryCandidate(Cand, TryCand)) {
4414 Cand.setBest(TryCand);
4416 }
4417 }
4418}
4419
4420/// Pick the best candidate node from either the top or bottom queue.
4422 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4423 // out common parts.
4424
4425 // Schedule as far as possible in the direction of no choice. This is most
4426 // efficient, but also provides the best heuristics for CriticalPSets.
4427 if (SUnit *SU = Bot.pickOnlyChoice()) {
4428 IsTopNode = false;
4429 tracePick(Only1, /*IsTopNode=*/false, /*IsPostRA=*/true);
4430 return SU;
4431 }
4432 if (SUnit *SU = Top.pickOnlyChoice()) {
4433 IsTopNode = true;
4434 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4435 return SU;
4436 }
4437 // Set the bottom-up policy based on the state of the current bottom zone and
4438 // the instructions outside the zone, including the top zone.
4439 CandPolicy BotPolicy;
4440 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4441 // Set the top-down policy based on the state of the current top zone and
4442 // the instructions outside the zone, including the bottom zone.
4443 CandPolicy TopPolicy;
4444 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4445
4446 // See if BotCand is still valid (because we previously scheduled from Top).
4447 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4448 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4449 BotCand.Policy != BotPolicy) {
4450 BotCand.reset(CandPolicy());
4452 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4453 } else {
4455#ifndef NDEBUG
4456 if (VerifyScheduling) {
4457 SchedCandidate TCand;
4458 TCand.reset(CandPolicy());
4460 assert(TCand.SU == BotCand.SU &&
4461 "Last pick result should correspond to re-picking right now");
4462 }
4463#endif
4464 }
4465
4466 // Check if the top Q has a better candidate.
4467 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4468 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4469 TopCand.Policy != TopPolicy) {
4470 TopCand.reset(CandPolicy());
4472 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4473 } else {
4475#ifndef NDEBUG
4476 if (VerifyScheduling) {
4477 SchedCandidate TCand;
4478 TCand.reset(CandPolicy());
4480 assert(TCand.SU == TopCand.SU &&
4481 "Last pick result should correspond to re-picking right now");
4482 }
4483#endif
4484 }
4485
4486 // Pick best from BotCand and TopCand.
4487 assert(BotCand.isValid());
4488 assert(TopCand.isValid());
4489 SchedCandidate Cand = BotCand;
4490 TopCand.Reason = NoCand;
4491 if (tryCandidate(Cand, TopCand)) {
4492 Cand.setBest(TopCand);
4494 }
4495
4496 IsTopNode = Cand.AtTop;
4497 tracePick(Cand, /*IsPostRA=*/true);
4498 return Cand.SU;
4499}
4500
4501/// Pick the next node to schedule.
4503 if (DAG->top() == DAG->bottom()) {
4504 assert(Top.Available.empty() && Top.Pending.empty() &&
4505 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4506 return nullptr;
4507 }
4508 SUnit *SU;
4509 if (RegionPolicy.OnlyBottomUp) {
4510 SU = Bot.pickOnlyChoice();
4511 if (SU) {
4512 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4513 } else {
4514 CandPolicy NoPolicy;
4515 BotCand.reset(NoPolicy);
4516 // Set the bottom-up policy based on the state of the current bottom
4517 // zone and the instructions outside the zone, including the top zone.
4518 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4520 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4521 tracePick(BotCand, /*IsPostRA=*/true);
4522 SU = BotCand.SU;
4523 }
4524 IsTopNode = false;
4525 } else if (RegionPolicy.OnlyTopDown) {
4526 SU = Top.pickOnlyChoice();
4527 if (SU) {
4528 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4529 } else {
4530 CandPolicy NoPolicy;
4531 TopCand.reset(NoPolicy);
4532 // Set the top-down policy based on the state of the current top zone
4533 // and the instructions outside the zone, including the bottom zone.
4534 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4536 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4537 tracePick(TopCand, /*IsPostRA=*/true);
4538 SU = TopCand.SU;
4539 }
4540 IsTopNode = true;
4541 } else {
4542 SU = pickNodeBidirectional(IsTopNode);
4543 }
4544 assert(!SU->isScheduled && "SUnit scheduled twice.");
4545
4546 if (SU->isTopReady())
4547 Top.removeReady(SU);
4548 if (SU->isBottomReady())
4549 Bot.removeReady(SU);
4550
4551 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4552 << *SU->getInstr());
4553
4554 if (IsTopNode) {
4555 if (SU->NodeNum == TopIdx++)
4556 ++NumInstrsInSourceOrderPostRA;
4557 } else {
4558 assert(BotIdx < NumRegionInstrs && "out of bounds");
4559 if (SU->NodeNum == BotIdx--)
4560 ++NumInstrsInSourceOrderPostRA;
4561 }
4562
4563 NumInstrsScheduledPostRA += 1;
4564
4565 return SU;
4566}
4567
4568/// Called after ScheduleDAGMI has scheduled an instruction and updated
4569/// scheduled/remaining flags in the DAG nodes.
4570void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4571 if (IsTopNode) {
4572 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4574 Top.bumpNode(SU);
4575 } else {
4576 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4578 Bot.bumpNode(SU);
4579 }
4580}
4581
4582//===----------------------------------------------------------------------===//
4583// ILP Scheduler. Currently for experimental analysis of heuristics.
4584//===----------------------------------------------------------------------===//
4585
4586namespace {
4587
4588/// Order nodes by the ILP metric.
4589struct ILPOrder {
4590 const SchedDFSResult *DFSResult = nullptr;
4591 const BitVector *ScheduledTrees = nullptr;
4592 bool MaximizeILP;
4593
4594 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4595
4596 /// Apply a less-than relation on node priority.
4597 ///
4598 /// (Return true if A comes after B in the Q.)
4599 bool operator()(const SUnit *A, const SUnit *B) const {
4600 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4601 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4602 if (SchedTreeA != SchedTreeB) {
4603 // Unscheduled trees have lower priority.
4604 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4605 return ScheduledTrees->test(SchedTreeB);
4606
4607 // Trees with shallower connections have lower priority.
4608 if (DFSResult->getSubtreeLevel(SchedTreeA)
4609 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4610 return DFSResult->getSubtreeLevel(SchedTreeA)
4611 < DFSResult->getSubtreeLevel(SchedTreeB);
4612 }
4613 }
4614 if (MaximizeILP)
4615 return DFSResult->getILP(A) < DFSResult->getILP(B);
4616 else
4617 return DFSResult->getILP(A) > DFSResult->getILP(B);
4618 }
4619};
4620
4621/// Schedule based on the ILP metric.
4622class ILPScheduler : public MachineSchedStrategy {
4623 ScheduleDAGMILive *DAG = nullptr;
4624 ILPOrder Cmp;
4625
4626 std::vector<SUnit*> ReadyQ;
4627
4628public:
4629 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4630
4631 void initialize(ScheduleDAGMI *dag) override {
4632 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4633 DAG = static_cast<ScheduleDAGMILive*>(dag);
4634 DAG->computeDFSResult();
4635 Cmp.DFSResult = DAG->getDFSResult();
4636 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4637 ReadyQ.clear();
4638 }
4639
4640 void registerRoots() override {
4641 // Restore the heap in ReadyQ with the updated DFS results.
4642 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4643 }
4644
4645 /// Implement MachineSchedStrategy interface.
4646 /// -----------------------------------------
4647
4648 /// Callback to select the highest priority node from the ready Q.
4649 SUnit *pickNode(bool &IsTopNode) override {
4650 if (ReadyQ.empty()) return nullptr;
4651 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4652 SUnit *SU = ReadyQ.back();
4653 ReadyQ.pop_back();
4654 IsTopNode = false;
4655 LLVM_DEBUG(dbgs() << "Pick node "
4656 << "SU(" << SU->NodeNum << ") "
4657 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4658 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4659 << " @"
4660 << DAG->getDFSResult()->getSubtreeLevel(
4661 DAG->getDFSResult()->getSubtreeID(SU))
4662 << '\n'
4663 << "Scheduling " << *SU->getInstr());
4664 return SU;
4665 }
4666
4667 /// Scheduler callback to notify that a new subtree is scheduled.
4668 void scheduleTree(unsigned SubtreeID) override {
4669 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4670 }
4671
4672 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4673 /// DFSResults, and resort the priority Q.
4674 void schedNode(SUnit *SU, bool IsTopNode) override {
4675 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4676 }
4677
4678 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4679
4680 void releaseBottomNode(SUnit *SU) override {
4681 ReadyQ.push_back(SU);
4682 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4683 }
4684};
4685
4686} // end anonymous namespace
4687
4689 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4690}
4692 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4693}
4694
4696 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4698 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4699
4700//===----------------------------------------------------------------------===//
4701// Machine Instruction Shuffler for Correctness Testing
4702//===----------------------------------------------------------------------===//
4703
4704#ifndef NDEBUG
4705namespace {
4706
4707/// Apply a less-than relation on the node order, which corresponds to the
4708/// instruction order prior to scheduling. IsReverse implements greater-than.
4709template<bool IsReverse>
4710struct SUnitOrder {
4711 bool operator()(SUnit *A, SUnit *B) const {
4712 if (IsReverse)
4713 return A->NodeNum > B->NodeNum;
4714 else
4715 return A->NodeNum < B->NodeNum;
4716 }
4717};
4718
4719/// Reorder instructions as much as possible.
4720class InstructionShuffler : public MachineSchedStrategy {
4721 bool IsAlternating;
4722 bool IsTopDown;
4723
4724 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4725 // gives nodes with a higher number higher priority causing the latest
4726 // instructions to be scheduled first.
4727 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4728 TopQ;
4729
4730 // When scheduling bottom-up, use greater-than as the queue priority.
4731 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4732 BottomQ;
4733
4734public:
4735 InstructionShuffler(bool alternate, bool topdown)
4736 : IsAlternating(alternate), IsTopDown(topdown) {}
4737
4738 void initialize(ScheduleDAGMI*) override {
4739 TopQ.clear();
4740 BottomQ.clear();
4741 }
4742
4743 /// Implement MachineSchedStrategy interface.
4744 /// -----------------------------------------
4745
4746 SUnit *pickNode(bool &IsTopNode) override {
4747 SUnit *SU;
4748 if (IsTopDown) {
4749 do {
4750 if (TopQ.empty()) return nullptr;
4751 SU = TopQ.top();
4752 TopQ.pop();
4753 } while (SU->isScheduled);
4754 IsTopNode = true;
4755 } else {
4756 do {
4757 if (BottomQ.empty()) return nullptr;
4758 SU = BottomQ.top();
4759 BottomQ.pop();
4760 } while (SU->isScheduled);
4761 IsTopNode = false;
4762 }
4763 if (IsAlternating)
4764 IsTopDown = !IsTopDown;
4765 return SU;
4766 }
4767
4768 void schedNode(SUnit *SU, bool IsTopNode) override {}
4769
4770 void releaseTopNode(SUnit *SU) override {
4771 TopQ.push(SU);
4772 }
4773 void releaseBottomNode(SUnit *SU) override {
4774 BottomQ.push(SU);
4775 }
4776};
4777
4778} // end anonymous namespace
4779
4781 bool Alternate =
4783 bool TopDown = PreRADirection != MISched::BottomUp;
4784 return new ScheduleDAGMILive(
4785 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4786}
4787
4789 "shuffle", "Shuffle machine instructions alternating directions",
4791#endif // !NDEBUG
4792
4793//===----------------------------------------------------------------------===//
4794// GraphWriter support for ScheduleDAGMILive.
4795//===----------------------------------------------------------------------===//
4796
4797#ifndef NDEBUG
4798
4799template <>
4802
4803template <>
4806
4807 static std::string getGraphName(const ScheduleDAG *G) {
4808 return std::string(G->MF.getName());
4809 }
4810
4812 return true;
4813 }
4814
4815 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4816 if (ViewMISchedCutoff == 0)
4817 return false;
4818 return (Node->Preds.size() > ViewMISchedCutoff
4819 || Node->Succs.size() > ViewMISchedCutoff);
4820 }
4821
4822 /// If you want to override the dot attributes printed for a particular
4823 /// edge, override this method.
4824 static std::string getEdgeAttributes(const SUnit *Node,
4825 SUnitIterator EI,
4826 const ScheduleDAG *Graph) {
4827 if (EI.isArtificialDep())
4828 return "color=cyan,style=dashed";
4829 if (EI.isCtrlDep())
4830 return "color=blue,style=dashed";
4831 return "";
4832 }
4833
4834 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4835 std::string Str;
4836 raw_string_ostream SS(Str);
4837 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4838 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4839 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4840 SS << "SU:" << SU->NodeNum;
4841 if (DFS)
4842 SS << " I:" << DFS->getNumInstrs(SU);
4843 return Str;
4844 }
4845
4846 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4847 return G->getGraphNodeLabel(SU);
4848 }
4849
4850 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4851 std::string Str("shape=Mrecord");
4852 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4853 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4854 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4855 if (DFS) {
4856 Str += ",style=filled,fillcolor=\"#";
4857 Str += DOT::getColorString(DFS->getSubtreeID(N));
4858 Str += '"';
4859 }
4860 return Str;
4861 }
4862};
4863
4864#endif // NDEBUG
4865
4866/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4867/// rendered using 'dot'.
4868void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4869#ifndef NDEBUG
4870 ViewGraph(this, Name, false, Title);
4871#else
4872 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4873 << "systems with Graphviz or gv!\n";
4874#endif // NDEBUG
4875}
4876
4877/// Out-of-line implementation with no arguments is handy for gdb.
4879 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4880}
4881
4882/// Sort predicate for the intervals stored in an instance of
4883/// ResourceSegments. Intervals are always disjoint (no intersection
4884/// for any pairs of intervals), therefore we can sort the totality of
4885/// the intervals by looking only at the left boundary.
4888 return A.first < B.first;
4889}
4890
4891unsigned ResourceSegments::getFirstAvailableAt(
4892 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4893 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4894 IntervalBuilder) const {
4895 assert(llvm::is_sorted(_Intervals, sortIntervals) &&
4896 "Cannot execute on an un-sorted set of intervals.");
4897
4898 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4899 // a ResourceSegment interval for that situation.
4900 if (AcquireAtCycle == ReleaseAtCycle)
4901 return CurrCycle;
4902
4903 unsigned RetCycle = CurrCycle;
4904 ResourceSegments::IntervalTy NewInterval =
4905 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4906 for (auto &Interval : _Intervals) {
4907 if (!intersects(NewInterval, Interval))
4908 continue;
4909
4910 // Move the interval right next to the top of the one it
4911 // intersects.
4912 assert(Interval.second > NewInterval.first &&
4913 "Invalid intervals configuration.");
4914 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4915 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4916 }
4917 return RetCycle;
4918}
4919
4921 const unsigned CutOff) {
4922 assert(A.first <= A.second && "Cannot add negative resource usage");
4923 assert(CutOff > 0 && "0-size interval history has no use.");
4924 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4925 // instruction needed the resource to be available but does not use it.
4926 // However, ResourceSegment represents an interval that is closed on the left
4927 // and open on the right. It is impossible to represent an empty interval when
4928 // the left is closed. Do not add it to Intervals.
4929 if (A.first == A.second)
4930 return;
4931
4932 assert(all_of(_Intervals,
4933 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4934 return !intersects(A, Interval);
4935 }) &&
4936 "A resource is being overwritten");
4937 _Intervals.push_back(A);
4938
4939 sortAndMerge();
4940
4941 // Do not keep the full history of the intervals, just the
4942 // latest #CutOff.
4943 while (_Intervals.size() > CutOff)
4944 _Intervals.pop_front();
4945}
4946
4949 assert(A.first <= A.second && "Invalid interval");
4950 assert(B.first <= B.second && "Invalid interval");
4951
4952 // Share one boundary.
4953 if ((A.first == B.first) || (A.second == B.second))
4954 return true;
4955
4956 // full intersersect: [ *** ) B
4957 // [***) A
4958 if ((A.first > B.first) && (A.second < B.second))
4959 return true;
4960
4961 // right intersect: [ ***) B
4962 // [*** ) A
4963 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4964 return true;
4965
4966 // left intersect: [*** ) B
4967 // [ ***) A
4968 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4969 return true;
4970
4971 return false;
4972}
4973
4974void ResourceSegments::sortAndMerge() {
4975 if (_Intervals.size() <= 1)
4976 return;
4977
4978 // First sort the collection.
4979 _Intervals.sort(sortIntervals);
4980
4981 // can use next because I have at least 2 elements in the list
4982 auto next = std::next(std::begin(_Intervals));
4983 auto E = std::end(_Intervals);
4984 for (; next != E; ++next) {
4985 if (std::prev(next)->second >= next->first) {
4986 next->first = std::prev(next)->first;
4987 _Intervals.erase(std::prev(next));
4988 continue;
4989 }
4990 }
4991}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
Function Alias Analysis false
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA=false)
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
SmallVector< SchedRegion, 16 > MBBRegionsVector
static cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PriorityQueue class.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
reverse_iterator rend() const
Definition ArrayRef.h:139
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
reverse_iterator rbegin() const
Definition ArrayRef.h:138
bool test(unsigned Idx) const
Definition BitVector.h:480
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:163
iterator end()
Definition DenseMap.h:81
Register getReg() const
ECValue - The EquivalenceClasses data structure is just a set of these.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Segments::iterator iterator
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
iterator begin()
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
static LocationSize precise(uint64_t Value)
MachineInstrBundleIterator< const MachineInstr > const_iterator
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
Representation of each machine instruction.
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static LLVM_ABI MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI MachineSchedulerPass(const TargetMachine *TM)
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
SchedCandidate BotCand
Candidate last picked from Bot boundary.
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SchedCandidate TopCand
Candidate last picked from Top boundary.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
LLVM_ABI void dump(const TargetRegisterInfo &TRI) const
LLVM_ABI void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
void clear()
clear - Erase all elements from the queue.
Helpers for implementing custom MachineSchedStrategy classes.
ArrayRef< SUnit * > elements()
LLVM_ABI void dump() const
std::vector< SUnit * >::iterator iterator
StringRef getName() const
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
LLVM_ABI void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
LLVM_ABI void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
LLVM_ABI void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static LLVM_ABI bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
bool isWeak() const
Tests if this a weak dependence.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition ScheduleDAG.h:76
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
@ Weak
Arbitrary weak DAG edge.
Definition ScheduleDAG.h:75
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Register getReg() const
Returns the register associated with this edge.
bool isArtificialDep() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned NumSuccsLeft
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned NumPredsLeft
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
unsigned WeakPredsLeft
bool isBottomReady() const
bool hasPhysRegUses
Has physreg uses.
bool isTopReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
LLVM_ABI void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
LLVM_ABI void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
LLVM_ABI void dumpScheduledState() const
LLVM_ABI void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
A ScheduleDAG for scheduling lists of MachineInstr.
SmallVector< ClusterInfo > & getClusters()
Returns the array of the clusters.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph(const Twine &Name, const Twine &Title) override
viewGraph - Pop up a ghostview window with the reachable parts of the DAG rendered using 'dot'.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
std::reverse_iterator< const_iterator > const_reverse_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
const MCWriteProcResEntry * ProcResIter
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
LLVM_ABI unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic scheduling policy within a region.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
A raw_ostream that writes to an std::string.
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
ScheduleDAGMILive * createSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
@ Offset
Definition DWP.cpp:477
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
void stable_sort(R &&Range)
Definition STLExtras.h:2058
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition Format.h:157
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition LaneBitmask.h:92
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
LLVM_ABI char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
ScheduleDAGMI * createSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
cl::opt< bool > ViewMISchedDAGs
LLVM_ABI Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI cl::opt< bool > VerifyScheduling
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1920
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr unsigned InvalidClusterId
@ Other
Any other memory.
Definition ModRef.h:68
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition Format.h:150
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
SmallPtrSet< SUnit *, 8 > ClusterInfo
Keep record of which SUnit are in the same cluster group.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &)
LLVM_ABI void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI cl::opt< MISched::Direction > PreRADirection
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI void initializePostMachineSchedulerLegacyPass(PassRegistry &)
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
cl::opt< bool > PrintDAGs
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
const MachineLoopInfo * MLI
const TargetMachine * TM
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A region of an MBB for scheduling.
Summarize the unscheduled region.
LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.