LLVM 20.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
51#include "llvm/Config/llvm-config.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 "misched-prera-direction", cl::Hidden,
82 cl::desc("Pre reg-alloc list scheduling direction"),
86 "Force top-down pre reg-alloc list scheduling"),
87 clEnumValN(MISched::BottomUp, "bottomup",
88 "Force bottom-up pre reg-alloc list scheduling"),
89 clEnumValN(MISched::Bidirectional, "bidirectional",
90 "Force bidirectional pre reg-alloc list scheduling")));
91
93 "misched-postra-direction", cl::Hidden,
94 cl::desc("Post reg-alloc list scheduling direction"),
98 "Force top-down post reg-alloc list scheduling"),
99 clEnumValN(MISched::BottomUp, "bottomup",
100 "Force bottom-up post reg-alloc list scheduling"),
101 clEnumValN(MISched::Bidirectional, "bidirectional",
102 "Force bidirectional post reg-alloc list scheduling")));
103
106 cl::desc("Print critical path length to stdout"));
107
109 "verify-misched", cl::Hidden,
110 cl::desc("Verify machine instrs before and after machine scheduling"));
111
112#ifndef NDEBUG
114 "view-misched-dags", cl::Hidden,
115 cl::desc("Pop up a window to show MISched dags after they are processed"));
116cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117 cl::desc("Print schedule DAGs"));
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
120 cl::desc("Dump resource usage at schedule boundary."));
122 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
123 cl::desc("Show details of invoking getNextResoufceCycle."));
124#else
125const bool ViewMISchedDAGs = false;
126const bool PrintDAGs = false;
127const bool MischedDetailResourceBooking = false;
128#ifdef LLVM_ENABLE_DUMP
129const bool MISchedDumpReservedCycles = false;
130#endif // LLVM_ENABLE_DUMP
131#endif // NDEBUG
132
133} // end namespace llvm
134
135#ifndef NDEBUG
136/// In some situations a few uninteresting nodes depend on nearly all other
137/// nodes in the graph, provide a cutoff to hide them.
138static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
142 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
143
145 cl::desc("Only schedule this function"));
146static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147 cl::desc("Only schedule this MBB#"));
148#endif // NDEBUG
149
150/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151/// size of the ready lists.
153 cl::desc("Limit ready list to N instructions"), cl::init(256));
154
155static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156 cl::desc("Enable register pressure scheduling."), cl::init(true));
157
158static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
160
162 cl::desc("Enable memop clustering."),
163 cl::init(true));
164static cl::opt<bool>
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
166 cl::desc("Switch to fast cluster algorithm with the lost "
167 "of some fusion opportunities"),
168 cl::init(false));
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171 cl::desc("The threshold for fast cluster"),
172 cl::init(1000));
173
174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
177 cl::desc("Dump resource usage at schedule boundary."));
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180 cl::desc("Set width of the columns with "
181 "the resources and schedule units"),
182 cl::init(19));
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185 cl::desc("Set width of the columns showing resource booking."),
186 cl::init(5));
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
189 cl::desc("Sort the resources printed in the dump trace"));
190#endif
191
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194 cl::desc("Number of intervals to track"), cl::init(10));
195
196// DAG subtrees must have at least this many nodes.
197static const unsigned MinSubtreeSize = 8;
198
199// Pin the vtables to this file.
200void MachineSchedStrategy::anchor() {}
201
202void ScheduleDAGMutation::anchor() {}
203
204//===----------------------------------------------------------------------===//
205// Machine Instruction Scheduling Pass and Registry
206//===----------------------------------------------------------------------===//
207
210}
211
213 delete RegClassInfo;
214}
215
216namespace {
217
218/// Base class for a machine scheduler class that can run at any point.
219class MachineSchedulerBase : public MachineSchedContext,
220 public MachineFunctionPass {
221public:
222 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
223
224 void print(raw_ostream &O, const Module* = nullptr) const override;
225
226protected:
227 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228};
229
230/// MachineScheduler runs after coalescing and before register allocation.
231class MachineScheduler : public MachineSchedulerBase {
232public:
233 MachineScheduler();
234
235 void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237 bool runOnMachineFunction(MachineFunction&) override;
238
239 static char ID; // Class identification, replacement for typeinfo
240
241protected:
242 ScheduleDAGInstrs *createMachineScheduler();
243};
244
245/// PostMachineScheduler runs after shortly before code emission.
246class PostMachineScheduler : public MachineSchedulerBase {
247public:
248 PostMachineScheduler();
249
250 void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252 bool runOnMachineFunction(MachineFunction&) override;
253
254 static char ID; // Class identification, replacement for typeinfo
255
256protected:
257 ScheduleDAGInstrs *createPostMachineScheduler();
258};
259
260} // end anonymous namespace
261
262char MachineScheduler::ID = 0;
263
264char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
267 "Machine Instruction Scheduler", false, false)
275
276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
278}
279
280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281 AU.setPreservesCFG();
291}
292
293char PostMachineScheduler::ID = 0;
294
295char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298 "PostRA Machine Instruction Scheduler", false, false)
302INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
304
305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
307}
308
309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310 AU.setPreservesCFG();
316}
317
320
321/// A dummy default scheduler factory indicates whether the scheduler
322/// is overridden on the command line.
324 return nullptr;
325}
326
327/// MachineSchedOpt allows command line selection of the scheduler.
332 cl::desc("Machine instruction scheduler to use"));
333
335DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
337
339 "enable-misched",
340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
341 cl::Hidden);
342
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
346 cl::init(true), cl::Hidden);
347
348/// Decrement this iterator until reaching the top or a non-debug instr.
352 assert(I != Beg && "reached the top of the region, cannot decrement");
353 while (--I != Beg) {
354 if (!I->isDebugOrPseudoInstr())
355 break;
356 }
357 return I;
358}
359
360/// Non-const version.
366}
367
368/// If this iterator is a debug value, increment until reaching the End or a
369/// non-debug instruction.
373 for(; I != End; ++I) {
374 if (!I->isDebugOrPseudoInstr())
375 break;
376 }
377 return I;
378}
379
380/// Non-const version.
386}
387
388/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390 // Select the scheduler, or set the default.
392 if (Ctor != useDefaultMachineSched)
393 return Ctor(this);
394
395 // Get the default scheduler set by the target for this function.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
397 if (Scheduler)
398 return Scheduler;
399
400 // Default to GenericScheduler.
401 return createGenericSchedLive(this);
402}
403
404/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405/// the caller. We don't have a command line option to override the postRA
406/// scheduler. The Target must configure it.
407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408 // Get the postRA scheduler set by the target for this function.
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
410 if (Scheduler)
411 return Scheduler;
412
413 // Default to GenericScheduler.
414 return createGenericSchedPostRA(this);
415}
416
417/// Top-level MachineScheduler pass driver.
418///
419/// Visit blocks in function order. Divide each block into scheduling regions
420/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421/// consistent with the DAG builder, which traverses the interior of the
422/// scheduling regions bottom-up.
423///
424/// This design avoids exposing scheduling boundaries to the DAG builder,
425/// simplifying the DAG builder's support for "special" target instructions.
426/// At the same time the design allows target schedulers to operate across
427/// scheduling boundaries, for example to bundle the boundary instructions
428/// without reordering them. This creates complexity, because the target
429/// scheduler must update the RegionBegin and RegionEnd positions cached by
430/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431/// design would be to split blocks at scheduling boundaries, but LLVM has a
432/// general bias against block splitting purely for implementation simplicity.
433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434 if (skipFunction(mf.getFunction()))
435 return false;
436
437 if (EnableMachineSched.getNumOccurrences()) {
439 return false;
440 } else if (!mf.getSubtarget().enableMachineScheduler())
441 return false;
442
443 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445 // Initialize the context of the pass.
446 MF = &mf;
447 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
448 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
449 PassConfig = &getAnalysis<TargetPassConfig>();
450 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
453
454 if (VerifyScheduling) {
455 LLVM_DEBUG(LIS->dump());
456 MF->verify(this, "Before machine scheduling.", &errs());
457 }
458 RegClassInfo->runOnMachineFunction(*MF);
459
460 // Instantiate the selected scheduler for this target, function, and
461 // optimization level.
462 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
463 scheduleRegions(*Scheduler, false);
464
465 LLVM_DEBUG(LIS->dump());
467 MF->verify(this, "After machine scheduling.", &errs());
468 return true;
469}
470
471bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
472 if (skipFunction(mf.getFunction()))
473 return false;
474
475 if (EnablePostRAMachineSched.getNumOccurrences()) {
477 return false;
478 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
479 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
480 return false;
481 }
482 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
483
484 // Initialize the context of the pass.
485 MF = &mf;
486 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
487 PassConfig = &getAnalysis<TargetPassConfig>();
488 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
489
491 MF->verify(this, "Before post machine scheduling.", &errs());
492
493 // Instantiate the selected scheduler for this target, function, and
494 // optimization level.
495 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
496 scheduleRegions(*Scheduler, true);
497
499 MF->verify(this, "After post machine scheduling.", &errs());
500 return true;
501}
502
503/// Return true of the given instruction should not be included in a scheduling
504/// region.
505///
506/// MachineScheduler does not currently support scheduling across calls. To
507/// handle calls, the DAG builder needs to be modified to create register
508/// anti/output dependencies on the registers clobbered by the call's regmask
509/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
510/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
511/// the boundary, but there would be no benefit to postRA scheduling across
512/// calls this late anyway.
515 MachineFunction *MF,
516 const TargetInstrInfo *TII) {
517 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
518 MI->isFakeUse();
519}
520
521/// A region of an MBB for scheduling.
522namespace {
523struct SchedRegion {
524 /// RegionBegin is the first instruction in the scheduling region, and
525 /// RegionEnd is either MBB->end() or the scheduling boundary after the
526 /// last instruction in the scheduling region. These iterators cannot refer
527 /// to instructions outside of the identified scheduling region because
528 /// those may be reordered before scheduling this region.
529 MachineBasicBlock::iterator RegionBegin;
531 unsigned NumRegionInstrs;
532
534 unsigned N) :
535 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
536};
537} // end anonymous namespace
538
540
541static void
543 MBBRegionsVector &Regions,
544 bool RegionsTopDown) {
547
549 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
550 RegionEnd != MBB->begin(); RegionEnd = I) {
551
552 // Avoid decrementing RegionEnd for blocks with no terminator.
553 if (RegionEnd != MBB->end() ||
554 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
555 --RegionEnd;
556 }
557
558 // The next region starts above the previous region. Look backward in the
559 // instruction stream until we find the nearest boundary.
560 unsigned NumRegionInstrs = 0;
561 I = RegionEnd;
562 for (;I != MBB->begin(); --I) {
563 MachineInstr &MI = *std::prev(I);
564 if (isSchedBoundary(&MI, &*MBB, MF, TII))
565 break;
566 if (!MI.isDebugOrPseudoInstr()) {
567 // MBB::size() uses instr_iterator to count. Here we need a bundle to
568 // count as a single instruction.
569 ++NumRegionInstrs;
570 }
571 }
572
573 // It's possible we found a scheduling region that only has debug
574 // instructions. Don't bother scheduling these.
575 if (NumRegionInstrs != 0)
576 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
577 }
578
579 if (RegionsTopDown)
580 std::reverse(Regions.begin(), Regions.end());
581}
582
583/// Main driver for both MachineScheduler and PostMachineScheduler.
584void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
585 bool FixKillFlags) {
586 // Visit all machine basic blocks.
587 //
588 // TODO: Visit blocks in global postorder or postorder within the bottom-up
589 // loop tree. Then we can optionally compute global RegPressure.
590 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
591 MBB != MBBEnd; ++MBB) {
592
593 Scheduler.startBlock(&*MBB);
594
595#ifndef NDEBUG
596 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
597 continue;
598 if (SchedOnlyBlock.getNumOccurrences()
599 && (int)SchedOnlyBlock != MBB->getNumber())
600 continue;
601#endif
602
603 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
604 // points to the scheduling boundary at the bottom of the region. The DAG
605 // does not include RegionEnd, but the region does (i.e. the next
606 // RegionEnd is above the previous RegionBegin). If the current block has
607 // no terminator then RegionEnd == MBB->end() for the bottom region.
608 //
609 // All the regions of MBB are first found and stored in MBBRegions, which
610 // will be processed (MBB) top-down if initialized with true.
611 //
612 // The Scheduler may insert instructions during either schedule() or
613 // exitRegion(), even for empty regions. So the local iterators 'I' and
614 // 'RegionEnd' are invalid across these calls. Instructions must not be
615 // added to other regions than the current one without updating MBBRegions.
616
617 MBBRegionsVector MBBRegions;
618 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
619 for (const SchedRegion &R : MBBRegions) {
620 MachineBasicBlock::iterator I = R.RegionBegin;
621 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
622 unsigned NumRegionInstrs = R.NumRegionInstrs;
623
624 // Notify the scheduler of the region, even if we may skip scheduling
625 // it. Perhaps it still needs to be bundled.
626 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
627
628 // Skip empty scheduling regions (0 or 1 schedulable instructions).
629 if (I == RegionEnd || I == std::prev(RegionEnd)) {
630 // Close the current region. Bundle the terminator if needed.
631 // This invalidates 'RegionEnd' and 'I'.
632 Scheduler.exitRegion();
633 continue;
634 }
635 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
636 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
637 << " " << MBB->getName() << "\n From: " << *I
638 << " To: ";
639 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
640 else dbgs() << "End\n";
641 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
643 errs() << MF->getName();
644 errs() << ":%bb. " << MBB->getNumber();
645 errs() << " " << MBB->getName() << " \n";
646 }
647
648 // Schedule a region: possibly reorder instructions.
649 // This invalidates the original region iterators.
650 Scheduler.schedule();
651
652 // Close the current region.
653 Scheduler.exitRegion();
654 }
655 Scheduler.finishBlock();
656 // FIXME: Ideally, no further passes should rely on kill flags. However,
657 // thumb2 size reduction is currently an exception, so the PostMIScheduler
658 // needs to do this.
659 if (FixKillFlags)
660 Scheduler.fixupKills(*MBB);
661 }
662 Scheduler.finalizeSchedule();
663}
664
665void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
666 // unimplemented
667}
668
669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
671 dbgs() << "Queue " << Name << ": ";
672 for (const SUnit *SU : Queue)
673 dbgs() << SU->NodeNum << " ";
674 dbgs() << "\n";
675}
676#endif
677
678//===----------------------------------------------------------------------===//
679// ScheduleDAGMI - Basic machine instruction scheduling. This is
680// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
681// virtual registers.
682// ===----------------------------------------------------------------------===/
683
684// Provide a vtable anchor.
686
687/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
688/// NumPredsLeft reaches zero, release the successor node.
689///
690/// FIXME: Adjust SuccSU height based on MinLatency.
692 SUnit *SuccSU = SuccEdge->getSUnit();
693
694 if (SuccEdge->isWeak()) {
695 --SuccSU->WeakPredsLeft;
696 if (SuccEdge->isCluster())
697 NextClusterSucc = SuccSU;
698 return;
699 }
700#ifndef NDEBUG
701 if (SuccSU->NumPredsLeft == 0) {
702 dbgs() << "*** Scheduling failed! ***\n";
703 dumpNode(*SuccSU);
704 dbgs() << " has been released too many times!\n";
705 llvm_unreachable(nullptr);
706 }
707#endif
708 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
709 // CurrCycle may have advanced since then.
710 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
711 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
712
713 --SuccSU->NumPredsLeft;
714 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
715 SchedImpl->releaseTopNode(SuccSU);
716}
717
718/// releaseSuccessors - Call releaseSucc on each of SU's successors.
720 for (SDep &Succ : SU->Succs)
721 releaseSucc(SU, &Succ);
722}
723
724/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
725/// NumSuccsLeft reaches zero, release the predecessor node.
726///
727/// FIXME: Adjust PredSU height based on MinLatency.
729 SUnit *PredSU = PredEdge->getSUnit();
730
731 if (PredEdge->isWeak()) {
732 --PredSU->WeakSuccsLeft;
733 if (PredEdge->isCluster())
734 NextClusterPred = PredSU;
735 return;
736 }
737#ifndef NDEBUG
738 if (PredSU->NumSuccsLeft == 0) {
739 dbgs() << "*** Scheduling failed! ***\n";
740 dumpNode(*PredSU);
741 dbgs() << " has been released too many times!\n";
742 llvm_unreachable(nullptr);
743 }
744#endif
745 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
746 // CurrCycle may have advanced since then.
747 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
748 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
749
750 --PredSU->NumSuccsLeft;
751 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
752 SchedImpl->releaseBottomNode(PredSU);
753}
754
755/// releasePredecessors - Call releasePred on each of SU's predecessors.
757 for (SDep &Pred : SU->Preds)
758 releasePred(SU, &Pred);
759}
760
763 SchedImpl->enterMBB(bb);
764}
765
767 SchedImpl->leaveMBB();
769}
770
771/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
772/// after crossing a scheduling boundary. [begin, end) includes all instructions
773/// in the region, including the boundary itself and single-instruction regions
774/// that don't get scheduled.
778 unsigned regioninstrs)
779{
780 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
781
782 SchedImpl->initPolicy(begin, end, regioninstrs);
783
784 // Set dump direction after initializing sched policy.
786 if (SchedImpl->getPolicy().OnlyTopDown)
788 else if (SchedImpl->getPolicy().OnlyBottomUp)
790 else
793}
794
795/// This is normally called from the main scheduler loop but may also be invoked
796/// by the scheduling strategy to perform additional code motion.
799 // Advance RegionBegin if the first instruction moves down.
800 if (&*RegionBegin == MI)
801 ++RegionBegin;
802
803 // Update the instruction stream.
804 BB->splice(InsertPos, BB, MI);
805
806 // Update LiveIntervals
807 if (LIS)
808 LIS->handleMove(*MI, /*UpdateFlags=*/true);
809
810 // Recede RegionBegin if an instruction moves above the first.
811 if (RegionBegin == InsertPos)
812 RegionBegin = MI;
813}
814
816#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
817 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
819 return false;
820 }
821 ++NumInstrsScheduled;
822#endif
823 return true;
824}
825
826/// Per-region scheduling driver, called back from
827/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
828/// that does not consider liveness or register pressure. It is useful for
829/// PostRA scheduling and potentially other custom schedulers.
831 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
832 LLVM_DEBUG(SchedImpl->dumpPolicy());
833
834 // Build the DAG.
836
838
839 SmallVector<SUnit*, 8> TopRoots, BotRoots;
840 findRootsAndBiasEdges(TopRoots, BotRoots);
841
842 LLVM_DEBUG(dump());
843 if (PrintDAGs) dump();
845
846 // Initialize the strategy before modifying the DAG.
847 // This may initialize a DFSResult to be used for queue priority.
848 SchedImpl->initialize(this);
849
850 // Initialize ready queues now that the DAG and priority data are finalized.
851 initQueues(TopRoots, BotRoots);
852
853 bool IsTopNode = false;
854 while (true) {
855 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
856 SUnit *SU = SchedImpl->pickNode(IsTopNode);
857 if (!SU) break;
858
859 assert(!SU->isScheduled && "Node already scheduled");
860 if (!checkSchedLimit())
861 break;
862
863 MachineInstr *MI = SU->getInstr();
864 if (IsTopNode) {
865 assert(SU->isTopReady() && "node still has unscheduled dependencies");
866 if (&*CurrentTop == MI)
868 else
870 } else {
871 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
874 if (&*priorII == MI)
875 CurrentBottom = priorII;
876 else {
877 if (&*CurrentTop == MI)
878 CurrentTop = nextIfDebug(++CurrentTop, priorII);
881 }
882 }
883 // Notify the scheduling strategy before updating the DAG.
884 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
885 // runs, it can then use the accurate ReadyCycle time to determine whether
886 // newly released nodes can move to the readyQ.
887 SchedImpl->schedNode(SU, IsTopNode);
888
889 updateQueues(SU, IsTopNode);
890 }
891 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
892
894
895 LLVM_DEBUG({
896 dbgs() << "*** Final schedule for "
897 << printMBBReference(*begin()->getParent()) << " ***\n";
898 dumpSchedule();
899 dbgs() << '\n';
900 });
901}
902
903/// Apply each ScheduleDAGMutation step in order.
905 for (auto &m : Mutations)
906 m->apply(this);
907}
908
911 SmallVectorImpl<SUnit*> &BotRoots) {
912 for (SUnit &SU : SUnits) {
913 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
914
915 // Order predecessors so DFSResult follows the critical path.
916 SU.biasCriticalPath();
917
918 // A SUnit is ready to top schedule if it has no predecessors.
919 if (!SU.NumPredsLeft)
920 TopRoots.push_back(&SU);
921 // A SUnit is ready to bottom schedule if it has no successors.
922 if (!SU.NumSuccsLeft)
923 BotRoots.push_back(&SU);
924 }
926}
927
928/// Identify DAG roots and setup scheduler queues.
930 ArrayRef<SUnit*> BotRoots) {
931 NextClusterSucc = nullptr;
932 NextClusterPred = nullptr;
933
934 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
935 //
936 // Nodes with unreleased weak edges can still be roots.
937 // Release top roots in forward order.
938 for (SUnit *SU : TopRoots)
939 SchedImpl->releaseTopNode(SU);
940
941 // Release bottom roots in reverse order so the higher priority nodes appear
942 // first. This is more natural and slightly more efficient.
944 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
945 SchedImpl->releaseBottomNode(*I);
946 }
947
950
951 SchedImpl->registerRoots();
952
953 // Advance past initial DebugValues.
956}
957
958/// Update scheduler queues after scheduling an instruction.
959void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
960 // Release dependent instructions for scheduling.
961 if (IsTopNode)
963 else
965
966 SU->isScheduled = true;
967}
968
969/// Reinsert any remaining debug_values, just like the PostRA scheduler.
971 // If first instruction was a DBG_VALUE then put it back.
972 if (FirstDbgValue) {
975 }
976
977 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
978 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
979 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
980 MachineInstr *DbgValue = P.first;
981 MachineBasicBlock::iterator OrigPrevMI = P.second;
982 if (&*RegionBegin == DbgValue)
983 ++RegionBegin;
984 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
985 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
987 }
988}
989
990#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
991static const char *scheduleTableLegend = " i: issue\n x: resource booked";
992
994 // Bail off when there is no schedule model to query.
996 return;
997
998 // Nothing to show if there is no or just one instruction.
999 if (BB->size() < 2)
1000 return;
1001
1002 dbgs() << " * Schedule table (TopDown):\n";
1003 dbgs() << scheduleTableLegend << "\n";
1004 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1005 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1006 for (MachineInstr &MI : *this) {
1007 SUnit *SU = getSUnit(&MI);
1008 if (!SU)
1009 continue;
1010 const MCSchedClassDesc *SC = getSchedClass(SU);
1013 PI != PE; ++PI) {
1014 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1015 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1016 }
1017 }
1018 // Print the header with the cycles
1019 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1020 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1021 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1022 dbgs() << "|\n";
1023
1024 for (MachineInstr &MI : *this) {
1025 SUnit *SU = getSUnit(&MI);
1026 if (!SU) {
1027 dbgs() << "Missing SUnit\n";
1028 continue;
1029 }
1030 std::string NodeName("SU(");
1031 NodeName += std::to_string(SU->NodeNum) + ")";
1032 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1033 unsigned C = FirstCycle;
1034 for (; C <= LastCycle; ++C) {
1035 if (C == SU->TopReadyCycle)
1036 dbgs() << llvm::left_justify("| i", ColWidth);
1037 else
1038 dbgs() << llvm::left_justify("|", ColWidth);
1039 }
1040 dbgs() << "|\n";
1041 const MCSchedClassDesc *SC = getSchedClass(SU);
1042
1046
1048 llvm::stable_sort(ResourcesIt,
1049 [](const MCWriteProcResEntry &LHS,
1050 const MCWriteProcResEntry &RHS) -> bool {
1051 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1052 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1053 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1054 });
1055 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1056 C = FirstCycle;
1057 const std::string ResName =
1058 SchedModel.getResourceName(PI.ProcResourceIdx);
1059 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1060 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1061 dbgs() << llvm::left_justify("|", ColWidth);
1062 }
1063 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1064 ++I, ++C)
1065 dbgs() << llvm::left_justify("| x", ColWidth);
1066 while (C++ <= LastCycle)
1067 dbgs() << llvm::left_justify("|", ColWidth);
1068 // Place end char
1069 dbgs() << "| \n";
1070 }
1071 }
1072}
1073
1075 // Bail off when there is no schedule model to query.
1077 return;
1078
1079 // Nothing to show if there is no or just one instruction.
1080 if (BB->size() < 2)
1081 return;
1082
1083 dbgs() << " * Schedule table (BottomUp):\n";
1084 dbgs() << scheduleTableLegend << "\n";
1085
1086 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1087 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1088 for (MachineInstr &MI : *this) {
1089 SUnit *SU = getSUnit(&MI);
1090 if (!SU)
1091 continue;
1092 const MCSchedClassDesc *SC = getSchedClass(SU);
1095 PI != PE; ++PI) {
1096 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1097 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1098 }
1099 }
1100 // Print the header with the cycles
1101 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1102 for (int C = FirstCycle; C >= LastCycle; --C)
1103 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1104 dbgs() << "|\n";
1105
1106 for (MachineInstr &MI : *this) {
1107 SUnit *SU = getSUnit(&MI);
1108 if (!SU) {
1109 dbgs() << "Missing SUnit\n";
1110 continue;
1111 }
1112 std::string NodeName("SU(");
1113 NodeName += std::to_string(SU->NodeNum) + ")";
1114 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1115 int C = FirstCycle;
1116 for (; C >= LastCycle; --C) {
1117 if (C == (int)SU->BotReadyCycle)
1118 dbgs() << llvm::left_justify("| i", ColWidth);
1119 else
1120 dbgs() << llvm::left_justify("|", ColWidth);
1121 }
1122 dbgs() << "|\n";
1123 const MCSchedClassDesc *SC = getSchedClass(SU);
1127
1129 llvm::stable_sort(ResourcesIt,
1130 [](const MCWriteProcResEntry &LHS,
1131 const MCWriteProcResEntry &RHS) -> bool {
1132 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1133 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1134 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1135 });
1136 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1137 C = FirstCycle;
1138 const std::string ResName =
1139 SchedModel.getResourceName(PI.ProcResourceIdx);
1140 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1141 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1142 dbgs() << llvm::left_justify("|", ColWidth);
1143 }
1144 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1145 ++I, --C)
1146 dbgs() << llvm::left_justify("| x", ColWidth);
1147 while (C-- >= LastCycle)
1148 dbgs() << llvm::left_justify("|", ColWidth);
1149 // Place end char
1150 dbgs() << "| \n";
1151 }
1152 }
1153}
1154#endif
1155
1156#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1161 else if (DumpDir == DumpDirection::BottomUp)
1164 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1165 } else {
1166 dbgs() << "* Schedule table: DumpDirection not set.\n";
1167 }
1168 }
1169
1170 for (MachineInstr &MI : *this) {
1171 if (SUnit *SU = getSUnit(&MI))
1172 dumpNode(*SU);
1173 else
1174 dbgs() << "Missing SUnit\n";
1175 }
1176}
1177#endif
1178
1179//===----------------------------------------------------------------------===//
1180// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1181// preservation.
1182//===----------------------------------------------------------------------===//
1183
1185 delete DFSResult;
1186}
1187
1189 const MachineInstr &MI = *SU.getInstr();
1190 for (const MachineOperand &MO : MI.operands()) {
1191 if (!MO.isReg())
1192 continue;
1193 if (!MO.readsReg())
1194 continue;
1195 if (TrackLaneMasks && !MO.isUse())
1196 continue;
1197
1198 Register Reg = MO.getReg();
1199 if (!Reg.isVirtual())
1200 continue;
1201
1202 // Ignore re-defs.
1203 if (TrackLaneMasks) {
1204 bool FoundDef = false;
1205 for (const MachineOperand &MO2 : MI.all_defs()) {
1206 if (MO2.getReg() == Reg && !MO2.isDead()) {
1207 FoundDef = true;
1208 break;
1209 }
1210 }
1211 if (FoundDef)
1212 continue;
1213 }
1214
1215 // Record this local VReg use.
1217 for (; UI != VRegUses.end(); ++UI) {
1218 if (UI->SU == &SU)
1219 break;
1220 }
1221 if (UI == VRegUses.end())
1223 }
1224}
1225
1226/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1227/// crossing a scheduling boundary. [begin, end) includes all instructions in
1228/// the region, including the boundary itself and single-instruction regions
1229/// that don't get scheduled.
1233 unsigned regioninstrs)
1234{
1235 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1236 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1237
1238 // For convenience remember the end of the liveness region.
1239 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1240
1242
1243 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1244 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1245
1247 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1248}
1249
1250// Setup the register pressure trackers for the top scheduled and bottom
1251// scheduled regions.
1253 VRegUses.clear();
1255 for (SUnit &SU : SUnits)
1256 collectVRegUses(SU);
1257
1259 ShouldTrackLaneMasks, false);
1261 ShouldTrackLaneMasks, false);
1262
1263 // Close the RPTracker to finalize live ins.
1265
1267
1268 // Initialize the live ins and live outs.
1271
1272 // Close one end of the tracker so we can call
1273 // getMaxUpward/DownwardPressureDelta before advancing across any
1274 // instructions. This converts currently live regs into live ins/outs.
1277
1279 if (!BotRPTracker.getLiveThru().empty()) {
1281 LLVM_DEBUG(dbgs() << "Live Thru: ";
1283 };
1284
1285 // For each live out vreg reduce the pressure change associated with other
1286 // uses of the same vreg below the live-out reaching def.
1288
1289 // Account for liveness generated by the region boundary.
1290 if (LiveRegionEnd != RegionEnd) {
1292 BotRPTracker.recede(&LiveUses);
1293 updatePressureDiffs(LiveUses);
1294 }
1295
1296 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1298 dbgs() << "Bottom Pressure:\n";
1300
1302 (RegionEnd->isDebugInstr() &&
1304 "Can't find the region bottom");
1305
1306 // Cache the list of excess pressure sets in this region. This will also track
1307 // the max pressure in the scheduled code for these sets.
1308 RegionCriticalPSets.clear();
1309 const std::vector<unsigned> &RegionPressure =
1311 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1312 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1313 if (RegionPressure[i] > Limit) {
1314 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1315 << " Actual " << RegionPressure[i] << "\n");
1316 RegionCriticalPSets.push_back(PressureChange(i));
1317 }
1318 }
1319 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1320 for (const PressureChange &RCPS
1322 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1323 dbgs() << "\n");
1324}
1325
1328 const std::vector<unsigned> &NewMaxPressure) {
1329 const PressureDiff &PDiff = getPressureDiff(SU);
1330 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1331 for (const PressureChange &PC : PDiff) {
1332 if (!PC.isValid())
1333 break;
1334 unsigned ID = PC.getPSet();
1335 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1336 ++CritIdx;
1337 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1338 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1339 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1340 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1341 }
1342 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1343 if (NewMaxPressure[ID] >= Limit - 2) {
1344 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1345 << NewMaxPressure[ID]
1346 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1347 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1348 << " livethru)\n");
1349 }
1350 }
1351}
1352
1353/// Update the PressureDiff array for liveness after scheduling this
1354/// instruction.
1356 ArrayRef<RegisterMaskPair> LiveUses) {
1357 for (const RegisterMaskPair &P : LiveUses) {
1358 Register Reg = P.RegUnit;
1359 /// FIXME: Currently assuming single-use physregs.
1360 if (!Reg.isVirtual())
1361 continue;
1362
1364 // If the register has just become live then other uses won't change
1365 // this fact anymore => decrement pressure.
1366 // If the register has just become dead then other uses make it come
1367 // back to life => increment pressure.
1368 bool Decrement = P.LaneMask.any();
1369
1370 for (const VReg2SUnit &V2SU
1371 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1372 SUnit &SU = *V2SU.SU;
1373 if (SU.isScheduled || &SU == &ExitSU)
1374 continue;
1375
1376 PressureDiff &PDiff = getPressureDiff(&SU);
1377 PDiff.addPressureChange(Reg, Decrement, &MRI);
1378 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1379 << printReg(Reg, TRI) << ':'
1380 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1381 dbgs() << " to "; PDiff.dump(*TRI););
1382 }
1383 } else {
1384 assert(P.LaneMask.any());
1385 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1386 // This may be called before CurrentBottom has been initialized. However,
1387 // BotRPTracker must have a valid position. We want the value live into the
1388 // instruction or live out of the block, so ask for the previous
1389 // instruction's live-out.
1390 const LiveInterval &LI = LIS->getInterval(Reg);
1391 VNInfo *VNI;
1394 if (I == BB->end())
1395 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1396 else {
1398 VNI = LRQ.valueIn();
1399 }
1400 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1401 assert(VNI && "No live value at use.");
1402 for (const VReg2SUnit &V2SU
1403 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1404 SUnit *SU = V2SU.SU;
1405 // If this use comes before the reaching def, it cannot be a last use,
1406 // so decrease its pressure change.
1407 if (!SU->isScheduled && SU != &ExitSU) {
1408 LiveQueryResult LRQ =
1410 if (LRQ.valueIn() == VNI) {
1411 PressureDiff &PDiff = getPressureDiff(SU);
1412 PDiff.addPressureChange(Reg, true, &MRI);
1413 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1414 << *SU->getInstr();
1415 dbgs() << " to "; PDiff.dump(*TRI););
1416 }
1417 }
1418 }
1419 }
1420 }
1421}
1422
1424#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1425 if (EntrySU.getInstr() != nullptr)
1427 for (const SUnit &SU : SUnits) {
1428 dumpNodeAll(SU);
1429 if (ShouldTrackPressure) {
1430 dbgs() << " Pressure Diff : ";
1431 getPressureDiff(&SU).dump(*TRI);
1432 }
1433 dbgs() << " Single Issue : ";
1434 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1435 SchedModel.mustEndGroup(SU.getInstr()))
1436 dbgs() << "true;";
1437 else
1438 dbgs() << "false;";
1439 dbgs() << '\n';
1440 }
1441 if (ExitSU.getInstr() != nullptr)
1443#endif
1444}
1445
1446/// schedule - Called back from MachineScheduler::runOnMachineFunction
1447/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1448/// only includes instructions that have DAG nodes, not scheduling boundaries.
1449///
1450/// This is a skeletal driver, with all the functionality pushed into helpers,
1451/// so that it can be easily extended by experimental schedulers. Generally,
1452/// implementing MachineSchedStrategy should be sufficient to implement a new
1453/// scheduling algorithm. However, if a scheduler further subclasses
1454/// ScheduleDAGMILive then it will want to override this virtual method in order
1455/// to update any specialized state.
1457 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1458 LLVM_DEBUG(SchedImpl->dumpPolicy());
1460
1462
1463 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1464 findRootsAndBiasEdges(TopRoots, BotRoots);
1465
1466 // Initialize the strategy before modifying the DAG.
1467 // This may initialize a DFSResult to be used for queue priority.
1468 SchedImpl->initialize(this);
1469
1470 LLVM_DEBUG(dump());
1471 if (PrintDAGs) dump();
1473
1474 // Initialize ready queues now that the DAG and priority data are finalized.
1475 initQueues(TopRoots, BotRoots);
1476
1477 bool IsTopNode = false;
1478 while (true) {
1479 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1480 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1481 if (!SU) break;
1482
1483 assert(!SU->isScheduled && "Node already scheduled");
1484 if (!checkSchedLimit())
1485 break;
1486
1487 scheduleMI(SU, IsTopNode);
1488
1489 if (DFSResult) {
1490 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1491 if (!ScheduledTrees.test(SubtreeID)) {
1492 ScheduledTrees.set(SubtreeID);
1493 DFSResult->scheduleTree(SubtreeID);
1494 SchedImpl->scheduleTree(SubtreeID);
1495 }
1496 }
1497
1498 // Notify the scheduling strategy after updating the DAG.
1499 SchedImpl->schedNode(SU, IsTopNode);
1500
1501 updateQueues(SU, IsTopNode);
1502 }
1503 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1504
1506
1507 LLVM_DEBUG({
1508 dbgs() << "*** Final schedule for "
1509 << printMBBReference(*begin()->getParent()) << " ***\n";
1510 dumpSchedule();
1511 dbgs() << '\n';
1512 });
1513}
1514
1515/// Build the DAG and setup three register pressure trackers.
1517 if (!ShouldTrackPressure) {
1518 RPTracker.reset();
1519 RegionCriticalPSets.clear();
1521 return;
1522 }
1523
1524 // Initialize the register pressure tracker used by buildSchedGraph.
1526 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1527
1528 // Account for liveness generate by the region boundary.
1529 if (LiveRegionEnd != RegionEnd)
1530 RPTracker.recede();
1531
1532 // Build the DAG, and compute current register pressure.
1534
1535 // Initialize top/bottom trackers after computing region pressure.
1537}
1538
1540 if (!DFSResult)
1541 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1542 DFSResult->clear();
1544 DFSResult->resize(SUnits.size());
1547}
1548
1549/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1550/// only provides the critical path for single block loops. To handle loops that
1551/// span blocks, we could use the vreg path latencies provided by
1552/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1553/// available for use in the scheduler.
1554///
1555/// The cyclic path estimation identifies a def-use pair that crosses the back
1556/// edge and considers the depth and height of the nodes. For example, consider
1557/// the following instruction sequence where each instruction has unit latency
1558/// and defines an eponymous virtual register:
1559///
1560/// a->b(a,c)->c(b)->d(c)->exit
1561///
1562/// The cyclic critical path is a two cycles: b->c->b
1563/// The acyclic critical path is four cycles: a->b->c->d->exit
1564/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1565/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1566/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1567/// LiveInDepth = depth(b) = len(a->b) = 1
1568///
1569/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1570/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1571/// CyclicCriticalPath = min(2, 2) = 2
1572///
1573/// This could be relevant to PostRA scheduling, but is currently implemented
1574/// assuming LiveIntervals.
1576 // This only applies to single block loop.
1577 if (!BB->isSuccessor(BB))
1578 return 0;
1579
1580 unsigned MaxCyclicLatency = 0;
1581 // Visit each live out vreg def to find def/use pairs that cross iterations.
1583 Register Reg = P.RegUnit;
1584 if (!Reg.isVirtual())
1585 continue;
1586 const LiveInterval &LI = LIS->getInterval(Reg);
1587 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1588 if (!DefVNI)
1589 continue;
1590
1592 const SUnit *DefSU = getSUnit(DefMI);
1593 if (!DefSU)
1594 continue;
1595
1596 unsigned LiveOutHeight = DefSU->getHeight();
1597 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1598 // Visit all local users of the vreg def.
1599 for (const VReg2SUnit &V2SU
1600 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1601 SUnit *SU = V2SU.SU;
1602 if (SU == &ExitSU)
1603 continue;
1604
1605 // Only consider uses of the phi.
1607 if (!LRQ.valueIn()->isPHIDef())
1608 continue;
1609
1610 // Assume that a path spanning two iterations is a cycle, which could
1611 // overestimate in strange cases. This allows cyclic latency to be
1612 // estimated as the minimum slack of the vreg's depth or height.
1613 unsigned CyclicLatency = 0;
1614 if (LiveOutDepth > SU->getDepth())
1615 CyclicLatency = LiveOutDepth - SU->getDepth();
1616
1617 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1618 if (LiveInHeight > LiveOutHeight) {
1619 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1620 CyclicLatency = LiveInHeight - LiveOutHeight;
1621 } else
1622 CyclicLatency = 0;
1623
1624 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1625 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1626 if (CyclicLatency > MaxCyclicLatency)
1627 MaxCyclicLatency = CyclicLatency;
1628 }
1629 }
1630 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1631 return MaxCyclicLatency;
1632}
1633
1634/// Release ExitSU predecessors and setup scheduler queues. Re-position
1635/// the Top RP tracker in case the region beginning has changed.
1637 ArrayRef<SUnit*> BotRoots) {
1638 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1639 if (ShouldTrackPressure) {
1640 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1642 }
1643}
1644
1645/// Move an instruction and update register pressure.
1646void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1647 // Move the instruction to its new location in the instruction stream.
1648 MachineInstr *MI = SU->getInstr();
1649
1650 if (IsTopNode) {
1651 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1652 if (&*CurrentTop == MI)
1654 else {
1657 }
1658
1659 if (ShouldTrackPressure) {
1660 // Update top scheduled pressure.
1661 RegisterOperands RegOpers;
1662 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1663 /*IgnoreDead=*/false);
1665 // Adjust liveness and add missing dead+read-undef flags.
1666 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1667 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1668 } else {
1669 // Adjust for missing dead-def flags.
1670 RegOpers.detectDeadDefs(*MI, *LIS);
1671 }
1672
1673 TopRPTracker.advance(RegOpers);
1674 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1675 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1677
1679 }
1680 } else {
1681 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1684 if (&*priorII == MI)
1685 CurrentBottom = priorII;
1686 else {
1687 if (&*CurrentTop == MI) {
1688 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1690 }
1692 CurrentBottom = MI;
1694 }
1695 if (ShouldTrackPressure) {
1696 RegisterOperands RegOpers;
1697 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1698 /*IgnoreDead=*/false);
1700 // Adjust liveness and add missing dead+read-undef flags.
1701 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1702 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1703 } else {
1704 // Adjust for missing dead-def flags.
1705 RegOpers.detectDeadDefs(*MI, *LIS);
1706 }
1707
1711 BotRPTracker.recede(RegOpers, &LiveUses);
1712 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1713 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1715
1717 updatePressureDiffs(LiveUses);
1718 }
1719 }
1720}
1721
1722//===----------------------------------------------------------------------===//
1723// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1724//===----------------------------------------------------------------------===//
1725
1726namespace {
1727
1728/// Post-process the DAG to create cluster edges between neighboring
1729/// loads or between neighboring stores.
1730class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1731 struct MemOpInfo {
1732 SUnit *SU;
1734 int64_t Offset;
1735 LocationSize Width;
1736 bool OffsetIsScalable;
1737
1738 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1739 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1740 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1741 OffsetIsScalable(OffsetIsScalable) {}
1742
1743 static bool Compare(const MachineOperand *const &A,
1744 const MachineOperand *const &B) {
1745 if (A->getType() != B->getType())
1746 return A->getType() < B->getType();
1747 if (A->isReg())
1748 return A->getReg() < B->getReg();
1749 if (A->isFI()) {
1750 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1752 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1754 return StackGrowsDown ? A->getIndex() > B->getIndex()
1755 : A->getIndex() < B->getIndex();
1756 }
1757
1758 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1759 "index bases.");
1760 }
1761
1762 bool operator<(const MemOpInfo &RHS) const {
1763 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1764 // comparison instead when it's available.
1765 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1766 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1767 Compare))
1768 return true;
1769 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1770 BaseOps.begin(), BaseOps.end(), Compare))
1771 return false;
1772 if (Offset != RHS.Offset)
1773 return Offset < RHS.Offset;
1774 return SU->NodeNum < RHS.SU->NodeNum;
1775 }
1776 };
1777
1778 const TargetInstrInfo *TII;
1779 const TargetRegisterInfo *TRI;
1780 bool IsLoad;
1781 bool ReorderWhileClustering;
1782
1783public:
1784 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1785 const TargetRegisterInfo *tri, bool IsLoad,
1786 bool ReorderWhileClustering)
1787 : TII(tii), TRI(tri), IsLoad(IsLoad),
1788 ReorderWhileClustering(ReorderWhileClustering) {}
1789
1790 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1791
1792protected:
1793 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1794 ScheduleDAGInstrs *DAG);
1795 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1796 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1797 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1799};
1800
1801class StoreClusterMutation : public BaseMemOpClusterMutation {
1802public:
1803 StoreClusterMutation(const TargetInstrInfo *tii,
1804 const TargetRegisterInfo *tri,
1805 bool ReorderWhileClustering)
1806 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1807};
1808
1809class LoadClusterMutation : public BaseMemOpClusterMutation {
1810public:
1811 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1812 bool ReorderWhileClustering)
1813 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1814};
1815
1816} // end anonymous namespace
1817
1818namespace llvm {
1819
1820std::unique_ptr<ScheduleDAGMutation>
1822 const TargetRegisterInfo *TRI,
1823 bool ReorderWhileClustering) {
1824 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1825 TII, TRI, ReorderWhileClustering)
1826 : nullptr;
1827}
1828
1829std::unique_ptr<ScheduleDAGMutation>
1831 const TargetRegisterInfo *TRI,
1832 bool ReorderWhileClustering) {
1833 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1834 TII, TRI, ReorderWhileClustering)
1835 : nullptr;
1836}
1837
1838} // end namespace llvm
1839
1840// Sorting all the loads/stores first, then for each load/store, checking the
1841// following load/store one by one, until reach the first non-dependent one and
1842// call target hook to see if they can cluster.
1843// If FastCluster is enabled, we assume that, all the loads/stores have been
1844// preprocessed and now, they didn't have dependencies on each other.
1845void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1846 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1847 ScheduleDAGInstrs *DAG) {
1848 // Keep track of the current cluster length and bytes for each SUnit.
1850
1851 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1852 // cluster mem ops collected within `MemOpRecords` array.
1853 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1854 // Decision to cluster mem ops is taken based on target dependent logic
1855 auto MemOpa = MemOpRecords[Idx];
1856
1857 // Seek for the next load/store to do the cluster.
1858 unsigned NextIdx = Idx + 1;
1859 for (; NextIdx < End; ++NextIdx)
1860 // Skip if MemOpb has been clustered already or has dependency with
1861 // MemOpa.
1862 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1863 (FastCluster ||
1864 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1865 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1866 break;
1867 if (NextIdx == End)
1868 continue;
1869
1870 auto MemOpb = MemOpRecords[NextIdx];
1871 unsigned ClusterLength = 2;
1872 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1873 MemOpb.Width.getValue().getKnownMinValue();
1874 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1875 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1876 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1877 MemOpb.Width.getValue().getKnownMinValue();
1878 }
1879
1880 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1881 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1882 MemOpb.Offset, MemOpb.OffsetIsScalable,
1883 ClusterLength, CurrentClusterBytes))
1884 continue;
1885
1886 SUnit *SUa = MemOpa.SU;
1887 SUnit *SUb = MemOpb.SU;
1888 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1889 std::swap(SUa, SUb);
1890
1891 // FIXME: Is this check really required?
1892 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1893 continue;
1894
1895 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1896 << SUb->NodeNum << ")\n");
1897 ++NumClustered;
1898
1899 if (IsLoad) {
1900 // Copy successor edges from SUa to SUb. Interleaving computation
1901 // dependent on SUa can prevent load combining due to register reuse.
1902 // Predecessor edges do not need to be copied from SUb to SUa since
1903 // nearby loads should have effectively the same inputs.
1904 for (const SDep &Succ : SUa->Succs) {
1905 if (Succ.getSUnit() == SUb)
1906 continue;
1907 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1908 << ")\n");
1909 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1910 }
1911 } else {
1912 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1913 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1914 // do not need to be copied from SUa to SUb since no one will depend
1915 // on stores.
1916 // Notice that, we don't need to care about the memory dependency as
1917 // we won't try to cluster them if they have any memory dependency.
1918 for (const SDep &Pred : SUb->Preds) {
1919 if (Pred.getSUnit() == SUa)
1920 continue;
1921 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1922 << ")\n");
1923 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1924 }
1925 }
1926
1927 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1928 CurrentClusterBytes};
1929
1930 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1931 << ", Curr cluster bytes: " << CurrentClusterBytes
1932 << "\n");
1933 }
1934}
1935
1936void BaseMemOpClusterMutation::collectMemOpRecords(
1937 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1938 for (auto &SU : SUnits) {
1939 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1940 (!IsLoad && !SU.getInstr()->mayStore()))
1941 continue;
1942
1943 const MachineInstr &MI = *SU.getInstr();
1945 int64_t Offset;
1946 bool OffsetIsScalable;
1947 LocationSize Width = 0;
1949 OffsetIsScalable, Width, TRI)) {
1950 if (!Width.hasValue())
1951 continue;
1952
1953 MemOpRecords.push_back(
1954 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1955
1956 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1957 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1958 << ", Width: " << Width << "\n");
1959 }
1960#ifndef NDEBUG
1961 for (const auto *Op : BaseOps)
1962 assert(Op);
1963#endif
1964 }
1965}
1966
1967bool BaseMemOpClusterMutation::groupMemOps(
1970 bool FastCluster =
1972 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1973
1974 for (const auto &MemOp : MemOps) {
1975 unsigned ChainPredID = DAG->SUnits.size();
1976 if (FastCluster) {
1977 for (const SDep &Pred : MemOp.SU->Preds) {
1978 // We only want to cluster the mem ops that have the same ctrl(non-data)
1979 // pred so that they didn't have ctrl dependency for each other. But for
1980 // store instrs, we can still cluster them if the pred is load instr.
1981 if ((Pred.isCtrl() &&
1982 (IsLoad ||
1983 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1984 !Pred.isArtificial()) {
1985 ChainPredID = Pred.getSUnit()->NodeNum;
1986 break;
1987 }
1988 }
1989 } else
1990 ChainPredID = 0;
1991
1992 Groups[ChainPredID].push_back(MemOp);
1993 }
1994 return FastCluster;
1995}
1996
1997/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1998void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1999 // Collect all the clusterable loads/stores
2000 SmallVector<MemOpInfo, 32> MemOpRecords;
2001 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2002
2003 if (MemOpRecords.size() < 2)
2004 return;
2005
2006 // Put the loads/stores without dependency into the same group with some
2007 // heuristic if the DAG is too complex to avoid compiling time blow up.
2008 // Notice that, some fusion pair could be lost with this.
2010 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2011
2012 for (auto &Group : Groups) {
2013 // Sorting the loads/stores, so that, we can stop the cluster as early as
2014 // possible.
2015 llvm::sort(Group.second);
2016
2017 // Trying to cluster all the neighboring loads/stores.
2018 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2019 }
2020}
2021
2022//===----------------------------------------------------------------------===//
2023// CopyConstrain - DAG post-processing to encourage copy elimination.
2024//===----------------------------------------------------------------------===//
2025
2026namespace {
2027
2028/// Post-process the DAG to create weak edges from all uses of a copy to
2029/// the one use that defines the copy's source vreg, most likely an induction
2030/// variable increment.
2031class CopyConstrain : public ScheduleDAGMutation {
2032 // Transient state.
2033 SlotIndex RegionBeginIdx;
2034
2035 // RegionEndIdx is the slot index of the last non-debug instruction in the
2036 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2037 SlotIndex RegionEndIdx;
2038
2039public:
2040 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2041
2042 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2043
2044protected:
2045 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2046};
2047
2048} // end anonymous namespace
2049
2050namespace llvm {
2051
2052std::unique_ptr<ScheduleDAGMutation>
2054 const TargetRegisterInfo *TRI) {
2055 return std::make_unique<CopyConstrain>(TII, TRI);
2056}
2057
2058} // end namespace llvm
2059
2060/// constrainLocalCopy handles two possibilities:
2061/// 1) Local src:
2062/// I0: = dst
2063/// I1: src = ...
2064/// I2: = dst
2065/// I3: dst = src (copy)
2066/// (create pred->succ edges I0->I1, I2->I1)
2067///
2068/// 2) Local copy:
2069/// I0: dst = src (copy)
2070/// I1: = dst
2071/// I2: src = ...
2072/// I3: = dst
2073/// (create pred->succ edges I1->I2, I3->I2)
2074///
2075/// Although the MachineScheduler is currently constrained to single blocks,
2076/// this algorithm should handle extended blocks. An EBB is a set of
2077/// contiguously numbered blocks such that the previous block in the EBB is
2078/// always the single predecessor.
2079void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2080 LiveIntervals *LIS = DAG->getLIS();
2081 MachineInstr *Copy = CopySU->getInstr();
2082
2083 // Check for pure vreg copies.
2084 const MachineOperand &SrcOp = Copy->getOperand(1);
2085 Register SrcReg = SrcOp.getReg();
2086 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2087 return;
2088
2089 const MachineOperand &DstOp = Copy->getOperand(0);
2090 Register DstReg = DstOp.getReg();
2091 if (!DstReg.isVirtual() || DstOp.isDead())
2092 return;
2093
2094 // Check if either the dest or source is local. If it's live across a back
2095 // edge, it's not local. Note that if both vregs are live across the back
2096 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2097 // If both the copy's source and dest are local live intervals, then we
2098 // should treat the dest as the global for the purpose of adding
2099 // constraints. This adds edges from source's other uses to the copy.
2100 unsigned LocalReg = SrcReg;
2101 unsigned GlobalReg = DstReg;
2102 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2103 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2104 LocalReg = DstReg;
2105 GlobalReg = SrcReg;
2106 LocalLI = &LIS->getInterval(LocalReg);
2107 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2108 return;
2109 }
2110 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2111
2112 // Find the global segment after the start of the local LI.
2113 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2114 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2115 // local live range. We could create edges from other global uses to the local
2116 // start, but the coalescer should have already eliminated these cases, so
2117 // don't bother dealing with it.
2118 if (GlobalSegment == GlobalLI->end())
2119 return;
2120
2121 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2122 // returned the next global segment. But if GlobalSegment overlaps with
2123 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2124 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2125 if (GlobalSegment->contains(LocalLI->beginIndex()))
2126 ++GlobalSegment;
2127
2128 if (GlobalSegment == GlobalLI->end())
2129 return;
2130
2131 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2132 if (GlobalSegment != GlobalLI->begin()) {
2133 // Two address defs have no hole.
2134 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2135 GlobalSegment->start)) {
2136 return;
2137 }
2138 // If the prior global segment may be defined by the same two-address
2139 // instruction that also defines LocalLI, then can't make a hole here.
2140 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2141 LocalLI->beginIndex())) {
2142 return;
2143 }
2144 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2145 // it would be a disconnected component in the live range.
2146 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2147 "Disconnected LRG within the scheduling region.");
2148 }
2149 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2150 if (!GlobalDef)
2151 return;
2152
2153 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2154 if (!GlobalSU)
2155 return;
2156
2157 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2158 // constraining the uses of the last local def to precede GlobalDef.
2159 SmallVector<SUnit*,8> LocalUses;
2160 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2161 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2162 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2163 for (const SDep &Succ : LastLocalSU->Succs) {
2164 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2165 continue;
2166 if (Succ.getSUnit() == GlobalSU)
2167 continue;
2168 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2169 return;
2170 LocalUses.push_back(Succ.getSUnit());
2171 }
2172 // Open the top of the GlobalLI hole by constraining any earlier global uses
2173 // to precede the start of LocalLI.
2174 SmallVector<SUnit*,8> GlobalUses;
2175 MachineInstr *FirstLocalDef =
2176 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2177 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2178 for (const SDep &Pred : GlobalSU->Preds) {
2179 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2180 continue;
2181 if (Pred.getSUnit() == FirstLocalSU)
2182 continue;
2183 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2184 return;
2185 GlobalUses.push_back(Pred.getSUnit());
2186 }
2187 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2188 // Add the weak edges.
2189 for (SUnit *LU : LocalUses) {
2190 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2191 << GlobalSU->NodeNum << ")\n");
2192 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2193 }
2194 for (SUnit *GU : GlobalUses) {
2195 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2196 << FirstLocalSU->NodeNum << ")\n");
2197 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2198 }
2199}
2200
2201/// Callback from DAG postProcessing to create weak edges to encourage
2202/// copy elimination.
2203void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2204 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2205 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2206
2207 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2208 if (FirstPos == DAG->end())
2209 return;
2210 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2211 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2212 *priorNonDebug(DAG->end(), DAG->begin()));
2213
2214 for (SUnit &SU : DAG->SUnits) {
2215 if (!SU.getInstr()->isCopy())
2216 continue;
2217
2218 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2219 }
2220}
2221
2222//===----------------------------------------------------------------------===//
2223// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2224// and possibly other custom schedulers.
2225//===----------------------------------------------------------------------===//
2226
2227static const unsigned InvalidCycle = ~0U;
2228
2230
2231/// Given a Count of resource usage and a Latency value, return true if a
2232/// SchedBoundary becomes resource limited.
2233/// If we are checking after scheduling a node, we should return true when
2234/// we just reach the resource limit.
2235static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2236 unsigned Latency, bool AfterSchedNode) {
2237 int ResCntFactor = (int)(Count - (Latency * LFactor));
2238 if (AfterSchedNode)
2239 return ResCntFactor >= (int)LFactor;
2240 else
2241 return ResCntFactor > (int)LFactor;
2242}
2243
2245 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2246 // Destroying and reconstructing it is very expensive though. So keep
2247 // invalid, placeholder HazardRecs.
2248 if (HazardRec && HazardRec->isEnabled()) {
2249 delete HazardRec;
2250 HazardRec = nullptr;
2251 }
2252 Available.clear();
2253 Pending.clear();
2254 CheckPending = false;
2255 CurrCycle = 0;
2256 CurrMOps = 0;
2257 MinReadyCycle = std::numeric_limits<unsigned>::max();
2258 ExpectedLatency = 0;
2259 DependentLatency = 0;
2260 RetiredMOps = 0;
2261 MaxExecutedResCount = 0;
2262 ZoneCritResIdx = 0;
2263 IsResourceLimited = false;
2264 ReservedCycles.clear();
2265 ReservedResourceSegments.clear();
2266 ReservedCyclesIndex.clear();
2267 ResourceGroupSubUnitMasks.clear();
2268#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2269 // Track the maximum number of stall cycles that could arise either from the
2270 // latency of a DAG edge or the number of cycles that a processor resource is
2271 // reserved (SchedBoundary::ReservedCycles).
2272 MaxObservedStall = 0;
2273#endif
2274 // Reserve a zero-count for invalid CritResIdx.
2275 ExecutedResCounts.resize(1);
2276 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2277}
2278
2280init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2281 reset();
2282 if (!SchedModel->hasInstrSchedModel())
2283 return;
2285 for (SUnit &SU : DAG->SUnits) {
2286 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2287 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2288 * SchedModel->getMicroOpFactor();
2290 PI = SchedModel->getWriteProcResBegin(SC),
2291 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2292 unsigned PIdx = PI->ProcResourceIdx;
2293 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2294 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2295 RemainingCounts[PIdx] +=
2296 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2297 }
2298 }
2299}
2300
2302init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2303 reset();
2304 DAG = dag;
2305 SchedModel = smodel;
2306 Rem = rem;
2308 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2309 ReservedCyclesIndex.resize(ResourceCount);
2310 ExecutedResCounts.resize(ResourceCount);
2311 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2312 unsigned NumUnits = 0;
2313
2314 for (unsigned i = 0; i < ResourceCount; ++i) {
2315 ReservedCyclesIndex[i] = NumUnits;
2316 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2317 if (isUnbufferedGroup(i)) {
2318 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2319 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2320 U != UE; ++U)
2321 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2322 }
2323 }
2324
2325 ReservedCycles.resize(NumUnits, InvalidCycle);
2326 }
2327}
2328
2329/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2330/// these "soft stalls" differently than the hard stall cycles based on CPU
2331/// resources and computed by checkHazard(). A fully in-order model
2332/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2333/// available for scheduling until they are ready. However, a weaker in-order
2334/// model may use this for heuristics. For example, if a processor has in-order
2335/// behavior when reading certain resources, this may come into play.
2337 if (!SU->isUnbuffered)
2338 return 0;
2339
2340 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2341 if (ReadyCycle > CurrCycle)
2342 return ReadyCycle - CurrCycle;
2343 return 0;
2344}
2345
2346/// Compute the next cycle at which the given processor resource unit
2347/// can be scheduled.
2349 unsigned ReleaseAtCycle,
2350 unsigned AcquireAtCycle) {
2352 if (isTop())
2353 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2354 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2355
2356 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2357 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2358 }
2359
2360 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2361 // If this resource has never been used, always return cycle zero.
2362 if (NextUnreserved == InvalidCycle)
2363 return CurrCycle;
2364 // For bottom-up scheduling add the cycles needed for the current operation.
2365 if (!isTop())
2366 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2367 return NextUnreserved;
2368}
2369
2370/// Compute the next cycle at which the given processor resource can be
2371/// scheduled. Returns the next cycle and the index of the processor resource
2372/// instance in the reserved cycles vector.
2373std::pair<unsigned, unsigned>
2375 unsigned ReleaseAtCycle,
2376 unsigned AcquireAtCycle) {
2378 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2380 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2381 }
2382 unsigned MinNextUnreserved = InvalidCycle;
2383 unsigned InstanceIdx = 0;
2384 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2385 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2386 assert(NumberOfInstances > 0 &&
2387 "Cannot have zero instances of a ProcResource");
2388
2389 if (isUnbufferedGroup(PIdx)) {
2390 // If any subunits are used by the instruction, report that the
2391 // subunits of the resource group are available at the first cycle
2392 // in which the unit is available, effectively removing the group
2393 // record from hazarding and basing the hazarding decisions on the
2394 // subunit records. Otherwise, choose the first available instance
2395 // from among the subunits. Specifications which assign cycles to
2396 // both the subunits and the group or which use an unbuffered
2397 // group with buffered subunits will appear to schedule
2398 // strangely. In the first case, the additional cycles for the
2399 // group will be ignored. In the second, the group will be
2400 // ignored entirely.
2401 for (const MCWriteProcResEntry &PE :
2404 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2405 return std::make_pair(getNextResourceCycleByInstance(
2406 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2407 StartIndex);
2408
2409 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2410 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2411 unsigned NextUnreserved, NextInstanceIdx;
2412 std::tie(NextUnreserved, NextInstanceIdx) =
2413 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2414 if (MinNextUnreserved > NextUnreserved) {
2415 InstanceIdx = NextInstanceIdx;
2416 MinNextUnreserved = NextUnreserved;
2417 }
2418 }
2419 return std::make_pair(MinNextUnreserved, InstanceIdx);
2420 }
2421
2422 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2423 ++I) {
2424 unsigned NextUnreserved =
2425 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2427 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2428 << NextUnreserved << "c\n");
2429 if (MinNextUnreserved > NextUnreserved) {
2430 InstanceIdx = I;
2431 MinNextUnreserved = NextUnreserved;
2432 }
2433 }
2435 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2436 << "[" << InstanceIdx - StartIndex << "]"
2437 << " available @" << MinNextUnreserved << "c"
2438 << "\n");
2439 return std::make_pair(MinNextUnreserved, InstanceIdx);
2440}
2441
2442/// Does this SU have a hazard within the current instruction group.
2443///
2444/// The scheduler supports two modes of hazard recognition. The first is the
2445/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2446/// supports highly complicated in-order reservation tables
2447/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2448///
2449/// The second is a streamlined mechanism that checks for hazards based on
2450/// simple counters that the scheduler itself maintains. It explicitly checks
2451/// for instruction dispatch limitations, including the number of micro-ops that
2452/// can dispatch per cycle.
2453///
2454/// TODO: Also check whether the SU must start a new group.
2456 if (HazardRec->isEnabled()
2458 return true;
2459 }
2460
2461 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2462 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2463 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2464 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2465 return true;
2466 }
2467
2468 if (CurrMOps > 0 &&
2469 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2470 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2471 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2472 << (isTop() ? "begin" : "end") << " group\n");
2473 return true;
2474 }
2475
2477 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2478 for (const MCWriteProcResEntry &PE :
2481 unsigned ResIdx = PE.ProcResourceIdx;
2482 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2483 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2484 unsigned NRCycle, InstanceIdx;
2485 std::tie(NRCycle, InstanceIdx) =
2486 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2487 if (NRCycle > CurrCycle) {
2488#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2489 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2490#endif
2491 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2492 << SchedModel->getResourceName(ResIdx)
2493 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2494 << "=" << NRCycle << "c\n");
2495 return true;
2496 }
2497 }
2498 }
2499 return false;
2500}
2501
2502// Find the unscheduled node in ReadySUs with the highest latency.
2505 SUnit *LateSU = nullptr;
2506 unsigned RemLatency = 0;
2507 for (SUnit *SU : ReadySUs) {
2508 unsigned L = getUnscheduledLatency(SU);
2509 if (L > RemLatency) {
2510 RemLatency = L;
2511 LateSU = SU;
2512 }
2513 }
2514 if (LateSU) {
2515 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2516 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2517 }
2518 return RemLatency;
2519}
2520
2521// Count resources in this zone and the remaining unscheduled
2522// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2523// resource index, or zero if the zone is issue limited.
2525getOtherResourceCount(unsigned &OtherCritIdx) {
2526 OtherCritIdx = 0;
2528 return 0;
2529
2530 unsigned OtherCritCount = Rem->RemIssueCount
2531 + (RetiredMOps * SchedModel->getMicroOpFactor());
2532 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2533 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2534 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2535 PIdx != PEnd; ++PIdx) {
2536 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2537 if (OtherCount > OtherCritCount) {
2538 OtherCritCount = OtherCount;
2539 OtherCritIdx = PIdx;
2540 }
2541 }
2542 if (OtherCritIdx) {
2543 LLVM_DEBUG(
2544 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2545 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2546 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2547 }
2548 return OtherCritCount;
2549}
2550
2551void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2552 unsigned Idx) {
2553 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2554
2555#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2556 // ReadyCycle was been bumped up to the CurrCycle when this node was
2557 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2558 // scheduling, so may now be greater than ReadyCycle.
2559 if (ReadyCycle > CurrCycle)
2560 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2561#endif
2562
2563 if (ReadyCycle < MinReadyCycle)
2564 MinReadyCycle = ReadyCycle;
2565
2566 // Check for interlocks first. For the purpose of other heuristics, an
2567 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2568 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2569 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2571
2572 if (!HazardDetected) {
2573 Available.push(SU);
2574
2575 if (InPQueue)
2577 return;
2578 }
2579
2580 if (!InPQueue)
2581 Pending.push(SU);
2582}
2583
2584/// Move the boundary of scheduled code by one cycle.
2585void SchedBoundary::bumpCycle(unsigned NextCycle) {
2586 if (SchedModel->getMicroOpBufferSize() == 0) {
2587 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2588 "MinReadyCycle uninitialized");
2589 if (MinReadyCycle > NextCycle)
2590 NextCycle = MinReadyCycle;
2591 }
2592 // Update the current micro-ops, which will issue in the next cycle.
2593 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2594 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2595
2596 // Decrement DependentLatency based on the next cycle.
2597 if ((NextCycle - CurrCycle) > DependentLatency)
2598 DependentLatency = 0;
2599 else
2600 DependentLatency -= (NextCycle - CurrCycle);
2601
2602 if (!HazardRec->isEnabled()) {
2603 // Bypass HazardRec virtual calls.
2604 CurrCycle = NextCycle;
2605 } else {
2606 // Bypass getHazardType calls in case of long latency.
2607 for (; CurrCycle != NextCycle; ++CurrCycle) {
2608 if (isTop())
2610 else
2612 }
2613 }
2614 CheckPending = true;
2615 IsResourceLimited =
2617 getScheduledLatency(), true);
2618
2619 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2620 << '\n');
2621}
2622
2623void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2624 ExecutedResCounts[PIdx] += Count;
2625 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2626 MaxExecutedResCount = ExecutedResCounts[PIdx];
2627}
2628
2629/// Add the given processor resource to this scheduled zone.
2630///
2631/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2632/// cycles during which this resource is released.
2633///
2634/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2635/// cycles at which the resource is aquired after issue (assuming no stalls).
2636///
2637/// \return the next cycle at which the instruction may execute without
2638/// oversubscribing resources.
2639unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2640 unsigned ReleaseAtCycle,
2641 unsigned NextCycle,
2642 unsigned AcquireAtCycle) {
2643 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2644 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2645 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2646 << ReleaseAtCycle << "x" << Factor << "u\n");
2647
2648 // Update Executed resources counts.
2649 incExecutedResources(PIdx, Count);
2650 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2651 Rem->RemainingCounts[PIdx] -= Count;
2652
2653 // Check if this resource exceeds the current critical resource. If so, it
2654 // becomes the critical resource.
2655 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2656 ZoneCritResIdx = PIdx;
2657 LLVM_DEBUG(dbgs() << " *** Critical resource "
2658 << SchedModel->getResourceName(PIdx) << ": "
2660 << "c\n");
2661 }
2662 // For reserved resources, record the highest cycle using the resource.
2663 unsigned NextAvailable, InstanceIdx;
2664 std::tie(NextAvailable, InstanceIdx) =
2665 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2666 if (NextAvailable > CurrCycle) {
2667 LLVM_DEBUG(dbgs() << " Resource conflict: "
2668 << SchedModel->getResourceName(PIdx)
2669 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2670 << " reserved until @" << NextAvailable << "\n");
2671 }
2672 return NextAvailable;
2673}
2674
2675/// Move the boundary of scheduled code by one SUnit.
2677 // Update the reservation table.
2678 if (HazardRec->isEnabled()) {
2679 if (!isTop() && SU->isCall) {
2680 // Calls are scheduled with their preceding instructions. For bottom-up
2681 // scheduling, clear the pipeline state before emitting.
2682 HazardRec->Reset();
2683 }
2685 // Scheduling an instruction may have made pending instructions available.
2686 CheckPending = true;
2687 }
2688 // checkHazard should prevent scheduling multiple instructions per cycle that
2689 // exceed the issue width.
2690 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2691 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2692 assert(
2693 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2694 "Cannot schedule this instruction's MicroOps in the current cycle.");
2695
2696 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2697 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2698
2699 unsigned NextCycle = CurrCycle;
2700 switch (SchedModel->getMicroOpBufferSize()) {
2701 case 0:
2702 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2703 break;
2704 case 1:
2705 if (ReadyCycle > NextCycle) {
2706 NextCycle = ReadyCycle;
2707 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2708 }
2709 break;
2710 default:
2711 // We don't currently model the OOO reorder buffer, so consider all
2712 // scheduled MOps to be "retired". We do loosely model in-order resource
2713 // latency. If this instruction uses an in-order resource, account for any
2714 // likely stall cycles.
2715 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2716 NextCycle = ReadyCycle;
2717 break;
2718 }
2719 RetiredMOps += IncMOps;
2720
2721 // Update resource counts and critical resource.
2723 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2724 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2725 Rem->RemIssueCount -= DecRemIssue;
2726 if (ZoneCritResIdx) {
2727 // Scale scheduled micro-ops for comparing with the critical resource.
2728 unsigned ScaledMOps =
2729 RetiredMOps * SchedModel->getMicroOpFactor();
2730
2731 // If scaled micro-ops are now more than the previous critical resource by
2732 // a full cycle, then micro-ops issue becomes critical.
2733 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2734 >= (int)SchedModel->getLatencyFactor()) {
2735 ZoneCritResIdx = 0;
2736 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2737 << ScaledMOps / SchedModel->getLatencyFactor()
2738 << "c\n");
2739 }
2740 }
2743 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2744 unsigned RCycle =
2745 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2746 PI->AcquireAtCycle);
2747 if (RCycle > NextCycle)
2748 NextCycle = RCycle;
2749 }
2750 if (SU->hasReservedResource) {
2751 // For reserved resources, record the highest cycle using the resource.
2752 // For top-down scheduling, this is the cycle in which we schedule this
2753 // instruction plus the number of cycles the operations reserves the
2754 // resource. For bottom-up is it simply the instruction's cycle.
2757 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2758 unsigned PIdx = PI->ProcResourceIdx;
2759 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2760
2762 unsigned ReservedUntil, InstanceIdx;
2763 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2764 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2765 if (isTop()) {
2766 ReservedResourceSegments[InstanceIdx].add(
2768 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2770 } else {
2771 ReservedResourceSegments[InstanceIdx].add(
2773 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2775 }
2776 } else {
2777
2778 unsigned ReservedUntil, InstanceIdx;
2779 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2780 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2781 if (isTop()) {
2782 ReservedCycles[InstanceIdx] =
2783 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2784 } else
2785 ReservedCycles[InstanceIdx] = NextCycle;
2786 }
2787 }
2788 }
2789 }
2790 }
2791 // Update ExpectedLatency and DependentLatency.
2792 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2793 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2794 if (SU->getDepth() > TopLatency) {
2795 TopLatency = SU->getDepth();
2796 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2797 << SU->NodeNum << ") " << TopLatency << "c\n");
2798 }
2799 if (SU->getHeight() > BotLatency) {
2800 BotLatency = SU->getHeight();
2801 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2802 << SU->NodeNum << ") " << BotLatency << "c\n");
2803 }
2804 // If we stall for any reason, bump the cycle.
2805 if (NextCycle > CurrCycle)
2806 bumpCycle(NextCycle);
2807 else
2808 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2809 // resource limited. If a stall occurred, bumpCycle does this.
2810 IsResourceLimited =
2812 getScheduledLatency(), true);
2813
2814 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2815 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2816 // one cycle. Since we commonly reach the max MOps here, opportunistically
2817 // bump the cycle to avoid uselessly checking everything in the readyQ.
2818 CurrMOps += IncMOps;
2819
2820 // Bump the cycle count for issue group constraints.
2821 // This must be done after NextCycle has been adjust for all other stalls.
2822 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2823 // currCycle to X.
2824 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2825 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2826 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2827 << " group\n");
2828 bumpCycle(++NextCycle);
2829 }
2830
2831 while (CurrMOps >= SchedModel->getIssueWidth()) {
2832 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2833 << CurrCycle << '\n');
2834 bumpCycle(++NextCycle);
2835 }
2837}
2838
2839/// Release pending ready nodes in to the available queue. This makes them
2840/// visible to heuristics.
2842 // If the available queue is empty, it is safe to reset MinReadyCycle.
2843 if (Available.empty())
2844 MinReadyCycle = std::numeric_limits<unsigned>::max();
2845
2846 // Check to see if any of the pending instructions are ready to issue. If
2847 // so, add them to the available queue.
2848 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2849 SUnit *SU = *(Pending.begin() + I);
2850 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2851
2852 if (ReadyCycle < MinReadyCycle)
2853 MinReadyCycle = ReadyCycle;
2854
2855 if (Available.size() >= ReadyListLimit)
2856 break;
2857
2858 releaseNode(SU, ReadyCycle, true, I);
2859 if (E != Pending.size()) {
2860 --I;
2861 --E;
2862 }
2863 }
2864 CheckPending = false;
2865}
2866
2867/// Remove SU from the ready set for this boundary.
2869 if (Available.isInQueue(SU))
2871 else {
2872 assert(Pending.isInQueue(SU) && "bad ready count");
2874 }
2875}
2876
2877/// If this queue only has one ready candidate, return it. As a side effect,
2878/// defer any nodes that now hit a hazard, and advance the cycle until at least
2879/// one node is ready. If multiple instructions are ready, return NULL.
2881 if (CheckPending)
2883
2884 // Defer any ready instrs that now have a hazard.
2886 if (checkHazard(*I)) {
2887 Pending.push(*I);
2888 I = Available.remove(I);
2889 continue;
2890 }
2891 ++I;
2892 }
2893 for (unsigned i = 0; Available.empty(); ++i) {
2894// FIXME: Re-enable assert once PR20057 is resolved.
2895// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2896// "permanent hazard");
2897 (void)i;
2898 bumpCycle(CurrCycle + 1);
2900 }
2901
2904
2905 if (Available.size() == 1)
2906 return *Available.begin();
2907 return nullptr;
2908}
2909
2910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2911
2912/// Dump the content of the \ref ReservedCycles vector for the
2913/// resources that are used in the basic block.
2914///
2917 return;
2918
2919 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2920 unsigned StartIdx = 0;
2921
2922 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2923 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2924 std::string ResName = SchedModel->getResourceName(ResIdx);
2925 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2926 dbgs() << ResName << "(" << UnitIdx << ") = ";
2928 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2929 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2930 else
2931 dbgs() << "{ }\n";
2932 } else
2933 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2934 }
2935 StartIdx += NumUnits;
2936 }
2937}
2938
2939// This is useful information to dump after bumpNode.
2940// Note that the Queue contents are more useful before pickNodeFromQueue.
2942 unsigned ResFactor;
2943 unsigned ResCount;
2944 if (ZoneCritResIdx) {
2945 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2946 ResCount = getResourceCount(ZoneCritResIdx);
2947 } else {
2948 ResFactor = SchedModel->getMicroOpFactor();
2949 ResCount = RetiredMOps * ResFactor;
2950 }
2951 unsigned LFactor = SchedModel->getLatencyFactor();
2952 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2953 << " Retired: " << RetiredMOps;
2954 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2955 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2956 << ResCount / ResFactor << " "
2957 << SchedModel->getResourceName(ZoneCritResIdx)
2958 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2959 << (IsResourceLimited ? " - Resource" : " - Latency")
2960 << " limited.\n";
2963}
2964#endif
2965
2966//===----------------------------------------------------------------------===//
2967// GenericScheduler - Generic implementation of MachineSchedStrategy.
2968//===----------------------------------------------------------------------===//
2969
2972 const TargetSchedModel *SchedModel) {
2974 return;
2975
2976 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2979 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2980 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2981 ResDelta.CritResources += PI->ReleaseAtCycle;
2982 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2983 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2984 }
2985}
2986
2987/// Compute remaining latency. We need this both to determine whether the
2988/// overall schedule has become latency-limited and whether the instructions
2989/// outside this zone are resource or latency limited.
2990///
2991/// The "dependent" latency is updated incrementally during scheduling as the
2992/// max height/depth of scheduled nodes minus the cycles since it was
2993/// scheduled:
2994/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2995///
2996/// The "independent" latency is the max ready queue depth:
2997/// ILat = max N.depth for N in Available|Pending
2998///
2999/// RemainingLatency is the greater of independent and dependent latency.
3000///
3001/// These computations are expensive, especially in DAGs with many edges, so
3002/// only do them if necessary.
3003static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3004 unsigned RemLatency = CurrZone.getDependentLatency();
3005 RemLatency = std::max(RemLatency,
3006 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3007 RemLatency = std::max(RemLatency,
3008 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3009 return RemLatency;
3010}
3011
3012/// Returns true if the current cycle plus remaning latency is greater than
3013/// the critical path in the scheduling region.
3014bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3015 SchedBoundary &CurrZone,
3016 bool ComputeRemLatency,
3017 unsigned &RemLatency) const {
3018 // The current cycle is already greater than the critical path, so we are
3019 // already latency limited and don't need to compute the remaining latency.
3020 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3021 return true;
3022
3023 // If we haven't scheduled anything yet, then we aren't latency limited.
3024 if (CurrZone.getCurrCycle() == 0)
3025 return false;
3026
3027 if (ComputeRemLatency)
3028 RemLatency = computeRemLatency(CurrZone);
3029
3030 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3031}
3032
3033/// Set the CandPolicy given a scheduling zone given the current resources and
3034/// latencies inside and outside the zone.
3036 SchedBoundary &CurrZone,
3037 SchedBoundary *OtherZone) {
3038 // Apply preemptive heuristics based on the total latency and resources
3039 // inside and outside this zone. Potential stalls should be considered before
3040 // following this policy.
3041
3042 // Compute the critical resource outside the zone.
3043 unsigned OtherCritIdx = 0;
3044 unsigned OtherCount =
3045 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3046
3047 bool OtherResLimited = false;
3048 unsigned RemLatency = 0;
3049 bool RemLatencyComputed = false;
3050 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3051 RemLatency = computeRemLatency(CurrZone);
3052 RemLatencyComputed = true;
3053 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3054 OtherCount, RemLatency, false);
3055 }
3056
3057 // Schedule aggressively for latency in PostRA mode. We don't check for
3058 // acyclic latency during PostRA, and highly out-of-order processors will
3059 // skip PostRA scheduling.
3060 if (!OtherResLimited &&
3061 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3062 RemLatency))) {
3063 Policy.ReduceLatency |= true;
3064 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3065 << " RemainingLatency " << RemLatency << " + "
3066 << CurrZone.getCurrCycle() << "c > CritPath "
3067 << Rem.CriticalPath << "\n");
3068 }
3069 // If the same resource is limiting inside and outside the zone, do nothing.
3070 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3071 return;
3072
3073 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3074 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3075 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3076 } if (OtherResLimited) dbgs()
3077 << " RemainingLimit: "
3078 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3079 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3080 << " Latency limited both directions.\n");
3081
3082 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3083 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3084
3085 if (OtherResLimited)
3086 Policy.DemandResIdx = OtherCritIdx;
3087}
3088
3089#ifndef NDEBUG
3092 switch (Reason) {
3093 case NoCand: return "NOCAND ";
3094 case Only1: return "ONLY1 ";
3095 case PhysReg: return "PHYS-REG ";
3096 case RegExcess: return "REG-EXCESS";
3097 case RegCritical: return "REG-CRIT ";
3098 case Stall: return "STALL ";
3099 case Cluster: return "CLUSTER ";
3100 case Weak: return "WEAK ";
3101 case RegMax: return "REG-MAX ";
3102 case ResourceReduce: return "RES-REDUCE";
3103 case ResourceDemand: return "RES-DEMAND";
3104 case TopDepthReduce: return "TOP-DEPTH ";
3105 case TopPathReduce: return "TOP-PATH ";
3106 case BotHeightReduce:return "BOT-HEIGHT";
3107 case BotPathReduce: return "BOT-PATH ";
3108 case NextDefUse: return "DEF-USE ";
3109 case NodeOrder: return "ORDER ";
3110 };
3111 llvm_unreachable("Unknown reason!");
3112}
3113
3116 unsigned ResIdx = 0;
3117 unsigned Latency = 0;
3118 switch (Cand.Reason) {
3119 default:
3120 break;
3121 case RegExcess:
3122 P = Cand.RPDelta.Excess;
3123 break;
3124 case RegCritical:
3125 P = Cand.RPDelta.CriticalMax;
3126 break;
3127 case RegMax:
3128 P = Cand.RPDelta.CurrentMax;
3129 break;
3130 case ResourceReduce:
3131 ResIdx = Cand.Policy.ReduceResIdx;
3132 break;
3133 case ResourceDemand:
3134 ResIdx = Cand.Policy.DemandResIdx;
3135 break;
3136 case TopDepthReduce:
3137 Latency = Cand.SU->getDepth();
3138 break;
3139 case TopPathReduce:
3140 Latency = Cand.SU->getHeight();
3141 break;
3142 case BotHeightReduce:
3143 Latency = Cand.SU->getHeight();
3144 break;
3145 case BotPathReduce:
3146 Latency = Cand.SU->getDepth();
3147 break;
3148 }
3149 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3150 if (P.isValid())
3151 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3152 << ":" << P.getUnitInc() << " ";
3153 else
3154 dbgs() << " ";
3155 if (ResIdx)
3156 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3157 else
3158 dbgs() << " ";
3159 if (Latency)
3160 dbgs() << " " << Latency << " cycles ";
3161 else
3162 dbgs() << " ";
3163 dbgs() << '\n';
3164}
3165#endif
3166
3167namespace llvm {
3168/// Return true if this heuristic determines order.
3169/// TODO: Consider refactor return type of these functions as integer or enum,
3170/// as we may need to differentiate whether TryCand is better than Cand.
3171bool tryLess(int TryVal, int CandVal,
3175 if (TryVal < CandVal) {
3176 TryCand.Reason = Reason;
3177 return true;
3178 }
3179 if (TryVal > CandVal) {
3180 if (Cand.Reason > Reason)
3181 Cand.Reason = Reason;
3182 return true;
3183 }
3184 return false;
3185}
3186
3187bool tryGreater(int TryVal, int CandVal,
3191 if (TryVal > CandVal) {
3192 TryCand.Reason = Reason;
3193 return true;
3194 }
3195 if (TryVal < CandVal) {
3196 if (Cand.Reason > Reason)
3197 Cand.Reason = Reason;
3198 return true;
3199 }
3200 return false;
3201}
3202
3205 SchedBoundary &Zone) {
3206 if (Zone.isTop()) {
3207 // Prefer the candidate with the lesser depth, but only if one of them has
3208 // depth greater than the total latency scheduled so far, otherwise either
3209 // of them could be scheduled now with no stall.
3210 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3211 Zone.getScheduledLatency()) {
3212 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3214 return true;
3215 }
3216 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3218 return true;
3219 } else {
3220 // Prefer the candidate with the lesser height, but only if one of them has
3221 // height greater than the total latency scheduled so far, otherwise either
3222 // of them could be scheduled now with no stall.
3223 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3224 Zone.getScheduledLatency()) {
3225 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3227 return true;
3228 }
3229 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3231 return true;
3232 }
3233 return false;
3234}
3235} // end namespace llvm
3236
3237static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3238 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3239 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3240}
3241
3243 tracePick(Cand.Reason, Cand.AtTop);
3244}
3245
3247 assert(dag->hasVRegLiveness() &&
3248 "(PreRA)GenericScheduler needs vreg liveness");
3249 DAG = static_cast<ScheduleDAGMILive*>(dag);
3250 SchedModel = DAG->getSchedModel();
3251 TRI = DAG->TRI;
3252
3254 DAG->computeDFSResult();
3255
3256 Rem.init(DAG, SchedModel);
3257 Top.init(DAG, SchedModel, &Rem);
3258 Bot.init(DAG, SchedModel, &Rem);
3259
3260 // Initialize resource counts.
3261
3262 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3263 // are disabled, then these HazardRecs will be disabled.
3265 if (!Top.HazardRec) {
3266 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3267 }
3268 if (!Bot.HazardRec) {
3269 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3270 }
3271 TopCand.SU = nullptr;
3272 BotCand.SU = nullptr;
3273}
3274
3275/// Initialize the per-region scheduling policy.
3278 unsigned NumRegionInstrs) {
3279 const MachineFunction &MF = *Begin->getMF();
3280 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3281
3282 // Avoid setting up the register pressure tracker for small regions to save
3283 // compile time. As a rough heuristic, only track pressure when the number of
3284 // schedulable instructions exceeds half the allocatable integer register file
3285 // that is the largest legal integer regiser type.
3287 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3289 if (TLI->isTypeLegal(LegalIntVT)) {
3290 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3291 TLI->getRegClassFor(LegalIntVT));
3292 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3293 break;
3294 }
3295 }
3296
3297 // For generic targets, we default to bottom-up, because it's simpler and more
3298 // compile-time optimizations have been implemented in that direction.
3300
3301 // Allow the subtarget to override default policy.
3302 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3303
3304 // After subtarget overrides, apply command line options.
3305 if (!EnableRegPressure) {
3308 }
3309
3312 RegionPolicy.OnlyBottomUp = false;
3313 } else if (PreRADirection == MISched::BottomUp) {
3314 RegionPolicy.OnlyTopDown = false;
3316 } else if (PreRADirection == MISched::Bidirectional) {
3317 RegionPolicy.OnlyBottomUp = false;
3318 RegionPolicy.OnlyTopDown = false;
3319 }
3320}
3321
3323 // Cannot completely remove virtual function even in release mode.
3324#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3325 dbgs() << "GenericScheduler RegionPolicy: "
3326 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3327 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3328 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3329 << "\n";
3330#endif
3331}
3332
3333/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3334/// critical path by more cycles than it takes to drain the instruction buffer.
3335/// We estimate an upper bounds on in-flight instructions as:
3336///
3337/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3338/// InFlightIterations = AcyclicPath / CyclesPerIteration
3339/// InFlightResources = InFlightIterations * LoopResources
3340///
3341/// TODO: Check execution resources in addition to IssueCount.
3344 return;
3345
3346 // Scaled number of cycles per loop iteration.
3347 unsigned IterCount =
3350 // Scaled acyclic critical path.
3351 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3352 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3353 unsigned InFlightCount =
3354 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3355 unsigned BufferLimit =
3357
3358 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3359
3360 LLVM_DEBUG(
3361 dbgs() << "IssueCycles="
3363 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3364 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3365 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3366 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3367 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3368}
3369
3372
3373 // Some roots may not feed into ExitSU. Check all of them in case.
3374 for (const SUnit *SU : Bot.Available) {
3375 if (SU->getDepth() > Rem.CriticalPath)
3376 Rem.CriticalPath = SU->getDepth();
3377 }
3378 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3380 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3381 }
3382
3384 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3385 checkAcyclicLatency();
3386 }
3387}
3388
3389namespace llvm {
3391 const PressureChange &CandP,
3395 const TargetRegisterInfo *TRI,
3396 const MachineFunction &MF) {
3397 // If one candidate decreases and the other increases, go with it.
3398 // Invalid candidates have UnitInc==0.
3399 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3400 Reason)) {
3401 return true;
3402 }
3403 // Do not compare the magnitude of pressure changes between top and bottom
3404 // boundary.
3405 if (Cand.AtTop != TryCand.AtTop)
3406 return false;
3407
3408 // If both candidates affect the same set in the same boundary, go with the
3409 // smallest increase.
3410 unsigned TryPSet = TryP.getPSetOrMax();
3411 unsigned CandPSet = CandP.getPSetOrMax();
3412 if (TryPSet == CandPSet) {
3413 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3414 Reason);
3415 }
3416
3417 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3418 std::numeric_limits<int>::max();
3419
3420 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3421 std::numeric_limits<int>::max();
3422
3423 // If the candidates are decreasing pressure, reverse priority.
3424 if (TryP.getUnitInc() < 0)
3425 std::swap(TryRank, CandRank);
3426 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3427}
3428
3429unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3430 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3431}
3432
3433/// Minimize physical register live ranges. Regalloc wants them adjacent to
3434/// their physreg def/use.
3435///
3436/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3437/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3438/// with the operation that produces or consumes the physreg. We'll do this when
3439/// regalloc has support for parallel copies.
3440int biasPhysReg(const SUnit *SU, bool isTop) {
3441 const MachineInstr *MI = SU->getInstr();
3442
3443 if (MI->isCopy()) {
3444 unsigned ScheduledOper = isTop ? 1 : 0;
3445 unsigned UnscheduledOper = isTop ? 0 : 1;
3446 // If we have already scheduled the physreg produce/consumer, immediately
3447 // schedule the copy.
3448 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3449 return 1;
3450 // If the physreg is at the boundary, defer it. Otherwise schedule it
3451 // immediately to free the dependent. We can hoist the copy later.
3452 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3453 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3454 return AtBoundary ? -1 : 1;
3455 }
3456
3457 if (MI->isMoveImmediate()) {
3458 // If we have a move immediate and all successors have been assigned, bias
3459 // towards scheduling this later. Make sure all register defs are to
3460 // physical registers.
3461 bool DoBias = true;
3462 for (const MachineOperand &Op : MI->defs()) {
3463 if (Op.isReg() && !Op.getReg().isPhysical()) {
3464 DoBias = false;
3465 break;
3466 }
3467 }
3468
3469 if (DoBias)
3470 return isTop ? -1 : 1;
3471 }
3472
3473 return 0;
3474}
3475} // end namespace llvm
3476
3478 bool AtTop,
3479 const RegPressureTracker &RPTracker,
3480 RegPressureTracker &TempTracker) {
3481 Cand.SU = SU;
3482 Cand.AtTop = AtTop;
3483 if (DAG->isTrackingPressure()) {
3484 if (AtTop) {
3485 TempTracker.getMaxDownwardPressureDelta(
3486 Cand.SU->getInstr(),
3487 Cand.RPDelta,
3488 DAG->getRegionCriticalPSets(),
3489 DAG->getRegPressure().MaxSetPressure);
3490 } else {
3491 if (VerifyScheduling) {
3492 TempTracker.getMaxUpwardPressureDelta(
3493 Cand.SU->getInstr(),
3494 &DAG->getPressureDiff(Cand.SU),
3495 Cand.RPDelta,
3496 DAG->getRegionCriticalPSets(),
3497 DAG->getRegPressure().MaxSetPressure);
3498 } else {
3499 RPTracker.getUpwardPressureDelta(
3500 Cand.SU->getInstr(),
3501 DAG->getPressureDiff(Cand.SU),
3502 Cand.RPDelta,
3503 DAG->getRegionCriticalPSets(),
3504 DAG->getRegPressure().MaxSetPressure);
3505 }
3506 }
3507 }
3508 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3509 << " Try SU(" << Cand.SU->NodeNum << ") "
3511 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3512}
3513
3514/// Apply a set of heuristics to a new candidate. Heuristics are currently
3515/// hierarchical. This may be more efficient than a graduated cost model because
3516/// we don't need to evaluate all aspects of the model for each node in the
3517/// queue. But it's really done to make the heuristics easier to debug and
3518/// statistically analyze.
3519///
3520/// \param Cand provides the policy and current best candidate.
3521/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3522/// \param Zone describes the scheduled zone that we are extending, or nullptr
3523/// if Cand is from a different zone than TryCand.
3524/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3526 SchedCandidate &TryCand,
3527 SchedBoundary *Zone) const {
3528 // Initialize the candidate if needed.
3529 if (!Cand.isValid()) {
3530 TryCand.Reason = NodeOrder;
3531 return true;
3532 }
3533
3534 // Bias PhysReg Defs and copies to their uses and defined respectively.
3535 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3536 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3537 return TryCand.Reason != NoCand;
3538
3539 // Avoid exceeding the target's limit.
3540 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3541 Cand.RPDelta.Excess,
3542 TryCand, Cand, RegExcess, TRI,
3543 DAG->MF))
3544 return TryCand.Reason != NoCand;
3545
3546 // Avoid increasing the max critical pressure in the scheduled region.
3547 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3548 Cand.RPDelta.CriticalMax,
3549 TryCand, Cand, RegCritical, TRI,
3550 DAG->MF))
3551 return TryCand.Reason != NoCand;
3552
3553 // We only compare a subset of features when comparing nodes between
3554 // Top and Bottom boundary. Some properties are simply incomparable, in many
3555 // other instances we should only override the other boundary if something
3556 // is a clear good pick on one boundary. Skip heuristics that are more
3557 // "tie-breaking" in nature.
3558 bool SameBoundary = Zone != nullptr;
3559 if (SameBoundary) {
3560 // For loops that are acyclic path limited, aggressively schedule for
3561 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3562 // heuristics to take precedence.
3563 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3564 tryLatency(TryCand, Cand, *Zone))
3565 return TryCand.Reason != NoCand;
3566
3567 // Prioritize instructions that read unbuffered resources by stall cycles.
3568 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3569 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3570 return TryCand.Reason != NoCand;
3571 }
3572
3573 // Keep clustered nodes together to encourage downstream peephole
3574 // optimizations which may reduce resource requirements.
3575 //
3576 // This is a best effort to set things up for a post-RA pass. Optimizations
3577 // like generating loads of multiple registers should ideally be done within
3578 // the scheduler pass by combining the loads during DAG postprocessing.
3579 const SUnit *CandNextClusterSU =
3580 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3581 const SUnit *TryCandNextClusterSU =
3582 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3583 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3584 Cand.SU == CandNextClusterSU,
3585 TryCand, Cand, Cluster))
3586 return TryCand.Reason != NoCand;
3587
3588 if (SameBoundary) {
3589 // Weak edges are for clustering and other constraints.
3590 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3591 getWeakLeft(Cand.SU, Cand.AtTop),
3592 TryCand, Cand, Weak))
3593 return TryCand.Reason != NoCand;
3594 }
3595
3596 // Avoid increasing the max pressure of the entire region.
3597 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3598 Cand.RPDelta.CurrentMax,
3599 TryCand, Cand, RegMax, TRI,
3600 DAG->MF))
3601 return TryCand.Reason != NoCand;
3602
3603 if (SameBoundary) {
3604 // Avoid critical resource consumption and balance the schedule.
3605 TryCand.initResourceDelta(DAG, SchedModel);
3607 TryCand, Cand, ResourceReduce))
3608 return TryCand.Reason != NoCand;
3611 TryCand, Cand, ResourceDemand))
3612 return TryCand.Reason != NoCand;
3613
3614 // Avoid serializing long latency dependence chains.
3615 // For acyclic path limited loops, latency was already checked above.
3617 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3618 return TryCand.Reason != NoCand;
3619
3620 // Fall through to original instruction order.
3621 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3622 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3623 TryCand.Reason = NodeOrder;
3624 return true;
3625 }
3626 }
3627
3628 return false;
3629}
3630
3631/// Pick the best candidate from the queue.
3632///
3633/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3634/// DAG building. To adjust for the current scheduling location we need to
3635/// maintain the number of vreg uses remaining to be top-scheduled.
3637 const CandPolicy &ZonePolicy,
3638 const RegPressureTracker &RPTracker,
3639 SchedCandidate &Cand) {
3640 // getMaxPressureDelta temporarily modifies the tracker.
3641 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3642
3643 ReadyQueue &Q = Zone.Available;
3644 for (SUnit *SU : Q) {
3645
3646 SchedCandidate TryCand(ZonePolicy);
3647 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3648 // Pass SchedBoundary only when comparing nodes from the same boundary.
3649 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3650 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3651 // Initialize resource delta if needed in case future heuristics query it.
3652 if (TryCand.ResDelta == SchedResourceDelta())
3653 TryCand.initResourceDelta(DAG, SchedModel);
3654 Cand.setBest(TryCand);
3656 }
3657 }
3658}
3659
3660/// Pick the best candidate node from either the top or bottom queue.
3662 // Schedule as far as possible in the direction of no choice. This is most
3663 // efficient, but also provides the best heuristics for CriticalPSets.
3664 if (SUnit *SU = Bot.pickOnlyChoice()) {
3665 IsTopNode = false;
3666 tracePick(Only1, false);
3667 return SU;
3668 }
3669 if (SUnit *SU = Top.pickOnlyChoice()) {
3670 IsTopNode = true;
3671 tracePick(Only1, true);
3672 return SU;
3673 }
3674 // Set the bottom-up policy based on the state of the current bottom zone and
3675 // the instructions outside the zone, including the top zone.
3676 CandPolicy BotPolicy;
3677 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3678 // Set the top-down policy based on the state of the current top zone and
3679 // the instructions outside the zone, including the bottom zone.
3680 CandPolicy TopPolicy;
3681 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3682
3683 // See if BotCand is still valid (because we previously scheduled from Top).
3684 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3685 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3686 BotCand.Policy != BotPolicy) {
3687 BotCand.reset(CandPolicy());
3688 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3689 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3690 } else {
3691 LLVM_DEBUG(traceCandidate(BotCand));
3692#ifndef NDEBUG
3693 if (VerifyScheduling) {
3694 SchedCandidate TCand;
3695 TCand.reset(CandPolicy());
3696 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3697 assert(TCand.SU == BotCand.SU &&
3698 "Last pick result should correspond to re-picking right now");
3699 }
3700#endif
3701 }
3702
3703 // Check if the top Q has a better candidate.
3704 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3705 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3706 TopCand.Policy != TopPolicy) {
3707 TopCand.reset(CandPolicy());
3708 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3709 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3710 } else {
3711 LLVM_DEBUG(traceCandidate(TopCand));
3712#ifndef NDEBUG
3713 if (VerifyScheduling) {
3714 SchedCandidate TCand;
3715 TCand.reset(CandPolicy());
3716 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3717 assert(TCand.SU == TopCand.SU &&
3718 "Last pick result should correspond to re-picking right now");
3719 }
3720#endif
3721 }
3722
3723 // Pick best from BotCand and TopCand.
3724 assert(BotCand.isValid());
3725 assert(TopCand.isValid());
3726 SchedCandidate Cand = BotCand;
3727 TopCand.Reason = NoCand;
3728 if (tryCandidate(Cand, TopCand, nullptr)) {
3729 Cand.setBest(TopCand);
3731 }
3732
3733 IsTopNode = Cand.AtTop;
3734 tracePick(Cand);
3735 return Cand.SU;
3736}
3737
3738/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3740 if (DAG->top() == DAG->bottom()) {
3741 assert(Top.Available.empty() && Top.Pending.empty() &&
3742 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3743 return nullptr;
3744 }
3745 SUnit *SU;
3746 do {
3748 SU = Top.pickOnlyChoice();
3749 if (!SU) {
3750 CandPolicy NoPolicy;
3751 TopCand.reset(NoPolicy);
3752 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3753 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3754 tracePick(TopCand);
3755 SU = TopCand.SU;
3756 }
3757 IsTopNode = true;
3758 } else if (RegionPolicy.OnlyBottomUp) {
3759 SU = Bot.pickOnlyChoice();
3760 if (!SU) {
3761 CandPolicy NoPolicy;
3762 BotCand.reset(NoPolicy);
3763 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3764 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3765 tracePick(BotCand);
3766 SU = BotCand.SU;
3767 }
3768 IsTopNode = false;
3769 } else {
3770 SU = pickNodeBidirectional(IsTopNode);
3771 }
3772 } while (SU->isScheduled);
3773
3774 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
3775 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
3776 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
3777 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
3778 //
3779 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
3780 // isBottomReady. That is, it didn't factor into the decision to choose SU
3781 // because it isTopReady or isBottomReady, respectively. In fact, if the
3782 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
3783 // queues respectivley contain the original roots and don't get updated when
3784 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
3785 // because we schduled everything but the top roots. Conversley, if SU
3786 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
3787 // but the bottom roots. If its in a queue even coincidentally, it should be
3788 // removed so it does not get re-picked in a subsequent pickNode call.
3789 if (SU->isTopReady())
3790 Top.removeReady(SU);
3791 if (SU->isBottomReady())
3792 Bot.removeReady(SU);
3793
3794 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3795 << *SU->getInstr());
3796 return SU;
3797}
3798
3800 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3801 if (!isTop)
3802 ++InsertPos;
3803 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3804
3805 // Find already scheduled copies with a single physreg dependence and move
3806 // them just above the scheduled instruction.
3807 for (SDep &Dep : Deps) {
3808 if (Dep.getKind() != SDep::Data ||
3809 !Register::isPhysicalRegister(Dep.getReg()))
3810 continue;
3811 SUnit *DepSU = Dep.getSUnit();
3812 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3813 continue;
3814 MachineInstr *Copy = DepSU->getInstr();
3815 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3816 continue;
3817 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3818 DAG->dumpNode(*Dep.getSUnit()));
3819 DAG->moveInstruction(Copy, InsertPos);
3820 }
3821}
3822
3823/// Update the scheduler's state after scheduling a node. This is the same node
3824/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3825/// update it's state based on the current cycle before MachineSchedStrategy
3826/// does.
3827///
3828/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3829/// them here. See comments in biasPhysReg.
3830void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3831 if (IsTopNode) {
3832 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3833 Top.bumpNode(SU);
3834 if (SU->hasPhysRegUses)
3835 reschedulePhysReg(SU, true);
3836 } else {
3837 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3838 Bot.bumpNode(SU);
3839 if (SU->hasPhysRegDefs)
3840 reschedulePhysReg(SU, false);
3841 }
3842}
3843
3844/// Create the standard converging machine scheduler. This will be used as the
3845/// default scheduler if the target does not set a default.
3847 ScheduleDAGMILive *DAG =
3848 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3849 // Register DAG post-processors.
3850 //
3851 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3852 // data and pass it to later mutations. Have a single mutation that gathers
3853 // the interesting nodes in one pass.
3855
3856 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3857 // Add MacroFusion mutation if fusions are not empty.
3858 const auto &MacroFusions = STI.getMacroFusions();
3859 if (!MacroFusions.empty())
3860 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3861 return DAG;
3862}
3863
3865 return createGenericSchedLive(C);
3866}
3867
3869GenericSchedRegistry("converge", "Standard converging scheduler.",
3871
3872//===----------------------------------------------------------------------===//
3873// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3874//===----------------------------------------------------------------------===//
3875
3877 DAG = Dag;
3878 SchedModel = DAG->getSchedModel();
3879 TRI = DAG->TRI;
3880
3881 Rem.init(DAG, SchedModel);
3882 Top.init(DAG, SchedModel, &Rem);
3883 Bot.init(DAG, SchedModel, &Rem);
3884
3885 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3886 // or are disabled, then these HazardRecs will be disabled.
3888 if (!Top.HazardRec) {
3889 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3890 }
3891 if (!Bot.HazardRec) {
3892 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3893 }
3894}
3895
3898 unsigned NumRegionInstrs) {
3899 const MachineFunction &MF = *Begin->getMF();
3900
3901 // Default to top-down because it was implemented first and existing targets
3902 // expect that behavior by default.
3904 RegionPolicy.OnlyBottomUp = false;
3905
3906 // Allow the subtarget to override default policy.
3908
3909 // After subtarget overrides, apply command line options.
3912 RegionPolicy.OnlyBottomUp = false;
3913 } else if (PostRADirection == MISched::BottomUp) {
3914 RegionPolicy.OnlyTopDown = false;
3917 RegionPolicy.OnlyBottomUp = false;
3918 RegionPolicy.OnlyTopDown = false;
3919 }
3920}
3921
3924
3925 // Some roots may not feed into ExitSU. Check all of them in case.
3926 for (const SUnit *SU : Bot.Available) {
3927 if (SU->getDepth() > Rem.CriticalPath)
3928 Rem.CriticalPath = SU->getDepth();
3929 }
3930 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3932 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3933 }
3934}
3935
3936/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3937///
3938/// \param Cand provides the policy and current best candidate.
3939/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3940/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3942 SchedCandidate &TryCand) {
3943 // Initialize the candidate if needed.
3944 if (!Cand.isValid()) {
3945 TryCand.Reason = NodeOrder;
3946 return true;
3947 }
3948
3949 // Prioritize instructions that read unbuffered resources by stall cycles.
3950 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3951 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3952 return TryCand.Reason != NoCand;
3953
3954 // Keep clustered nodes together.
3955 const SUnit *CandNextClusterSU =
3956 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3957 const SUnit *TryCandNextClusterSU =
3958 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3959 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3960 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
3961 return TryCand.Reason != NoCand;
3962
3963 // Avoid critical resource consumption and balance the schedule.
3965 TryCand, Cand, ResourceReduce))
3966 return TryCand.Reason != NoCand;
3969 TryCand, Cand, ResourceDemand))
3970 return TryCand.Reason != NoCand;
3971
3972 // We only compare a subset of features when comparing nodes between
3973 // Top and Bottom boundary.
3974 if (Cand.AtTop == TryCand.AtTop) {
3975 // Avoid serializing long latency dependence chains.
3976 if (Cand.Policy.ReduceLatency &&
3977 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
3978 return TryCand.Reason != NoCand;
3979 }
3980
3981 // Fall through to original instruction order.
3982 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3983 TryCand.Reason = NodeOrder;
3984 return true;
3985 }
3986
3987 return false;
3988}
3989
3991 SchedCandidate &Cand) {
3992 ReadyQueue &Q = Zone.Available;
3993 for (SUnit *SU : Q) {
3994 SchedCandidate TryCand(Cand.Policy);
3995 TryCand.SU = SU;
3996 TryCand.AtTop = Zone.isTop();
3997 TryCand.initResourceDelta(DAG, SchedModel);
3998 if (tryCandidate(Cand, TryCand)) {
3999 Cand.setBest(TryCand);
4001 }
4002 }
4003}
4004
4005/// Pick the best candidate node from either the top or bottom queue.
4007 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4008 // out common parts.
4009
4010 // Schedule as far as possible in the direction of no choice. This is most
4011 // efficient, but also provides the best heuristics for CriticalPSets.
4012 if (SUnit *SU = Bot.pickOnlyChoice()) {
4013 IsTopNode = false;
4014 tracePick(Only1, false);
4015 return SU;
4016 }
4017 if (SUnit *SU = Top.pickOnlyChoice()) {
4018 IsTopNode = true;
4019 tracePick(Only1, true);
4020 return SU;
4021 }
4022 // Set the bottom-up policy based on the state of the current bottom zone and
4023 // the instructions outside the zone, including the top zone.
4024 CandPolicy BotPolicy;
4025 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4026 // Set the top-down policy based on the state of the current top zone and
4027 // the instructions outside the zone, including the bottom zone.
4028 CandPolicy TopPolicy;
4029 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4030
4031 // See if BotCand is still valid (because we previously scheduled from Top).
4032 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4033 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4034 BotCand.Policy != BotPolicy) {
4035 BotCand.reset(CandPolicy());
4036 pickNodeFromQueue(Bot, BotCand);
4037 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4038 } else {
4039 LLVM_DEBUG(traceCandidate(BotCand));
4040#ifndef NDEBUG
4041 if (VerifyScheduling) {
4042 SchedCandidate TCand;
4043 TCand.reset(CandPolicy());
4044 pickNodeFromQueue(Bot, BotCand);
4045 assert(TCand.SU == BotCand.SU &&
4046 "Last pick result should correspond to re-picking right now");
4047 }
4048#endif
4049 }
4050
4051 // Check if the top Q has a better candidate.
4052 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4053 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4054 TopCand.Policy != TopPolicy) {
4055 TopCand.reset(CandPolicy());
4056 pickNodeFromQueue(Top, TopCand);
4057 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4058 } else {
4059 LLVM_DEBUG(traceCandidate(TopCand));
4060#ifndef NDEBUG
4061 if (VerifyScheduling) {
4062 SchedCandidate TCand;
4063 TCand.reset(CandPolicy());
4064 pickNodeFromQueue(Top, TopCand);
4065 assert(TCand.SU == TopCand.SU &&
4066 "Last pick result should correspond to re-picking right now");
4067 }
4068#endif
4069 }
4070
4071 // Pick best from BotCand and TopCand.
4072 assert(BotCand.isValid());
4073 assert(TopCand.isValid());
4074 SchedCandidate Cand = BotCand;
4075 TopCand.Reason = NoCand;
4076 if (tryCandidate(Cand, TopCand)) {
4077 Cand.setBest(TopCand);
4079 }
4080
4081 IsTopNode = Cand.AtTop;
4082 tracePick(Cand);
4083 return Cand.SU;
4084}
4085
4086/// Pick the next node to schedule.
4088 if (DAG->top() == DAG->bottom()) {
4089 assert(Top.Available.empty() && Top.Pending.empty() &&
4090 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4091 return nullptr;
4092 }
4093 SUnit *SU;
4094 do {
4096 SU = Bot.pickOnlyChoice();
4097 if (SU) {
4098 tracePick(Only1, true);
4099 } else {
4100 CandPolicy NoPolicy;
4101 BotCand.reset(NoPolicy);
4102 // Set the bottom-up policy based on the state of the current bottom
4103 // zone and the instructions outside the zone, including the top zone.
4104 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4105 pickNodeFromQueue(Bot, BotCand);
4106 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4107 tracePick(BotCand);
4108 SU = BotCand.SU;
4109 }
4110 IsTopNode = false;
4111 } else if (RegionPolicy.OnlyTopDown) {
4112 SU = Top.pickOnlyChoice();
4113 if (SU) {
4114 tracePick(Only1, true);
4115 } else {
4116 CandPolicy NoPolicy;
4117 TopCand.reset(NoPolicy);
4118 // Set the top-down policy based on the state of the current top zone
4119 // and the instructions outside the zone, including the bottom zone.
4120 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4121 pickNodeFromQueue(Top, TopCand);
4122 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4123 tracePick(TopCand);
4124 SU = TopCand.SU;
4125 }
4126 IsTopNode = true;
4127 } else {
4128 SU = pickNodeBidirectional(IsTopNode);
4129 }
4130 } while (SU->isScheduled);
4131
4132 if (SU->isTopReady())
4133 Top.removeReady(SU);
4134 if (SU->isBottomReady())
4135 Bot.removeReady(SU);
4136
4137 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4138 << *SU->getInstr());
4139 return SU;
4140}
4141
4142/// Called after ScheduleDAGMI has scheduled an instruction and updated
4143/// scheduled/remaining flags in the DAG nodes.
4144void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4145 if (IsTopNode) {
4146 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4147 Top.bumpNode(SU);
4148 } else {
4149 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4150 Bot.bumpNode(SU);
4151 }
4152}
4153
4155 ScheduleDAGMI *DAG =
4156 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4157 /*RemoveKillFlags=*/true);
4158 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4159 // Add MacroFusion mutation if fusions are not empty.
4160 const auto &MacroFusions = STI.getMacroFusions();
4161 if (!MacroFusions.empty())
4162 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4163 return DAG;
4164}
4165
4166//===----------------------------------------------------------------------===//
4167// ILP Scheduler. Currently for experimental analysis of heuristics.
4168//===----------------------------------------------------------------------===//
4169
4170namespace {
4171
4172/// Order nodes by the ILP metric.
4173struct ILPOrder {
4174 const SchedDFSResult *DFSResult = nullptr;
4175 const BitVector *ScheduledTrees = nullptr;
4176 bool MaximizeILP;
4177
4178 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4179
4180 /// Apply a less-than relation on node priority.
4181 ///
4182 /// (Return true if A comes after B in the Q.)
4183 bool operator()(const SUnit *A, const SUnit *B) const {
4184 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4185 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4186 if (SchedTreeA != SchedTreeB) {
4187 // Unscheduled trees have lower priority.
4188 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4189 return ScheduledTrees->test(SchedTreeB);
4190
4191 // Trees with shallower connections have lower priority.
4192 if (DFSResult->getSubtreeLevel(SchedTreeA)
4193 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4194 return DFSResult->getSubtreeLevel(SchedTreeA)
4195 < DFSResult->getSubtreeLevel(SchedTreeB);
4196 }
4197 }
4198 if (MaximizeILP)
4199 return DFSResult->getILP(A) < DFSResult->getILP(B);
4200 else
4201 return DFSResult->getILP(A) > DFSResult->getILP(B);
4202 }
4203};
4204
4205/// Schedule based on the ILP metric.
4206class ILPScheduler : public MachineSchedStrategy {
4207 ScheduleDAGMILive *DAG = nullptr;
4208 ILPOrder Cmp;
4209
4210 std::vector<SUnit*> ReadyQ;
4211
4212public:
4213 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4214
4215 void initialize(ScheduleDAGMI *dag) override {
4216 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4217 DAG = static_cast<ScheduleDAGMILive*>(dag);
4218 DAG->computeDFSResult();
4219 Cmp.DFSResult = DAG->getDFSResult();
4220 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4221 ReadyQ.clear();
4222 }
4223
4224 void registerRoots() override {
4225 // Restore the heap in ReadyQ with the updated DFS results.
4226 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4227 }
4228
4229 /// Implement MachineSchedStrategy interface.
4230 /// -----------------------------------------
4231
4232 /// Callback to select the highest priority node from the ready Q.
4233 SUnit *pickNode(bool &IsTopNode) override {
4234 if (ReadyQ.empty()) return nullptr;
4235 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4236 SUnit *SU = ReadyQ.back();
4237 ReadyQ.pop_back();
4238 IsTopNode = false;
4239 LLVM_DEBUG(dbgs() << "Pick node "
4240 << "SU(" << SU->NodeNum << ") "
4241 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4242 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4243 << " @"
4244 << DAG->getDFSResult()->getSubtreeLevel(
4245 DAG->getDFSResult()->getSubtreeID(SU))
4246 << '\n'
4247 << "Scheduling " << *SU->getInstr());
4248 return SU;
4249 }
4250
4251 /// Scheduler callback to notify that a new subtree is scheduled.
4252 void scheduleTree(unsigned SubtreeID) override {
4253 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4254 }
4255
4256 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4257 /// DFSResults, and resort the priority Q.
4258 void schedNode(SUnit *SU, bool IsTopNode) override {
4259 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4260 }
4261
4262 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4263
4264 void releaseBottomNode(SUnit *SU) override {
4265 ReadyQ.push_back(SU);
4266 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4267 }
4268};
4269
4270} // end anonymous namespace
4271
4273 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4274}
4276 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4277}
4278
4280 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4282 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4283
4284//===----------------------------------------------------------------------===//
4285// Machine Instruction Shuffler for Correctness Testing
4286//===----------------------------------------------------------------------===//
4287
4288#ifndef NDEBUG
4289namespace {
4290
4291/// Apply a less-than relation on the node order, which corresponds to the
4292/// instruction order prior to scheduling. IsReverse implements greater-than.
4293template<bool IsReverse>
4294struct SUnitOrder {
4295 bool operator()(SUnit *A, SUnit *B) const {
4296 if (IsReverse)
4297 return A->NodeNum > B->NodeNum;
4298 else
4299 return A->NodeNum < B->NodeNum;
4300 }
4301};
4302
4303/// Reorder instructions as much as possible.
4304class InstructionShuffler : public MachineSchedStrategy {
4305 bool IsAlternating;
4306 bool IsTopDown;
4307
4308 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4309 // gives nodes with a higher number higher priority causing the latest
4310 // instructions to be scheduled first.
4312 TopQ;
4313
4314 // When scheduling bottom-up, use greater-than as the queue priority.
4316 BottomQ;
4317
4318public:
4319 InstructionShuffler(bool alternate, bool topdown)
4320 : IsAlternating(alternate), IsTopDown(topdown) {}
4321
4322 void initialize(ScheduleDAGMI*) override {
4323 TopQ.clear();
4324 BottomQ.clear();
4325 }
4326
4327 /// Implement MachineSchedStrategy interface.
4328 /// -----------------------------------------
4329
4330 SUnit *pickNode(bool &IsTopNode) override {
4331 SUnit *SU;
4332 if (IsTopDown) {
4333 do {
4334 if (TopQ.empty()) return nullptr;
4335 SU = TopQ.top();
4336 TopQ.pop();
4337 } while (SU->isScheduled);
4338 IsTopNode = true;
4339 } else {
4340 do {
4341 if (BottomQ.empty()) return nullptr;
4342 SU = BottomQ.top();
4343 BottomQ.pop();
4344 } while (SU->isScheduled);
4345 IsTopNode = false;
4346 }
4347 if (IsAlternating)
4348 IsTopDown = !IsTopDown;
4349 return SU;
4350 }
4351
4352 void schedNode(SUnit *SU, bool IsTopNode) override {}
4353
4354 void releaseTopNode(SUnit *SU) override {
4355 TopQ.push(SU);
4356 }
4357 void releaseBottomNode(SUnit *SU) override {
4358 BottomQ.push(SU);
4359 }
4360};
4361
4362} // end anonymous namespace
4363
4365 bool Alternate =
4367 bool TopDown = PreRADirection != MISched::BottomUp;
4368 return new ScheduleDAGMILive(
4369 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4370}
4371
4373 "shuffle", "Shuffle machine instructions alternating directions",
4375#endif // !NDEBUG
4376
4377//===----------------------------------------------------------------------===//
4378// GraphWriter support for ScheduleDAGMILive.
4379//===----------------------------------------------------------------------===//
4380
4381#ifndef NDEBUG
4382namespace llvm {
4383
4384template<> struct GraphTraits<
4386
4387template<>
4390
4391 static std::string getGraphName(const ScheduleDAG *G) {
4392 return std::string(G->MF.getName());
4393 }
4394
4396 return true;
4397 }
4398
4399 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4400 if (ViewMISchedCutoff == 0)
4401 return false;
4402 return (Node->Preds.size() > ViewMISchedCutoff
4403 || Node->Succs.size() > ViewMISchedCutoff);
4404 }
4405
4406 /// If you want to override the dot attributes printed for a particular
4407 /// edge, override this method.
4408 static std::string getEdgeAttributes(const SUnit *Node,
4409 SUnitIterator EI,
4410 const ScheduleDAG *Graph) {
4411 if (EI.isArtificialDep())
4412 return "color=cyan,style=dashed";
4413 if (EI.isCtrlDep())
4414 return "color=blue,style=dashed";
4415 return "";
4416 }
4417
4418 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4419 std::string Str;
4420 raw_string_ostream SS(Str);
4421 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4422 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4423 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4424 SS << "SU:" << SU->NodeNum;
4425 if (DFS)
4426 SS << " I:" << DFS->getNumInstrs(SU);
4427 return Str;
4428 }
4429
4430 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4431 return G->getGraphNodeLabel(SU);
4432 }
4433
4434 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4435 std::string Str("shape=Mrecord");
4436 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4437 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4438 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4439 if (DFS) {
4440 Str += ",style=filled,fillcolor=\"#";
4441 Str += DOT::getColorString(DFS->getSubtreeID(N));
4442 Str += '"';
4443 }
4444 return Str;
4445 }
4446};
4447
4448} // end namespace llvm
4449#endif // NDEBUG
4450
4451/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4452/// rendered using 'dot'.
4453void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4454#ifndef NDEBUG
4455 ViewGraph(this, Name, false, Title);
4456#else
4457 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4458 << "systems with Graphviz or gv!\n";
4459#endif // NDEBUG
4460}
4461
4462/// Out-of-line implementation with no arguments is handy for gdb.
4464 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4465}
4466
4467/// Sort predicate for the intervals stored in an instance of
4468/// ResourceSegments. Intervals are always disjoint (no intersection
4469/// for any pairs of intervals), therefore we can sort the totality of
4470/// the intervals by looking only at the left boundary.
4473 return A.first < B.first;
4474}
4475
4476unsigned ResourceSegments::getFirstAvailableAt(
4477 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4478 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4479 IntervalBuilder) const {
4480 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4481 sortIntervals) &&
4482 "Cannot execute on an un-sorted set of intervals.");
4483
4484 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4485 // a ResourceSegment interval for that situation.
4486 if (AcquireAtCycle == ReleaseAtCycle)
4487 return CurrCycle;
4488
4489 unsigned RetCycle = CurrCycle;
4490 ResourceSegments::IntervalTy NewInterval =
4491 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4492 for (auto &Interval : _Intervals) {
4493 if (!intersects(NewInterval, Interval))
4494 continue;
4495
4496 // Move the interval right next to the top of the one it
4497 // intersects.
4498 assert(Interval.second > NewInterval.first &&
4499 "Invalid intervals configuration.");
4500 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4501 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4502 }
4503 return RetCycle;
4504}
4505
4507 const unsigned CutOff) {
4508 assert(A.first <= A.second && "Cannot add negative resource usage");
4509 assert(CutOff > 0 && "0-size interval history has no use.");
4510 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4511 // instruction needed the resource to be available but does not use it.
4512 // However, ResourceSegment represents an interval that is closed on the left
4513 // and open on the right. It is impossible to represent an empty interval when
4514 // the left is closed. Do not add it to Intervals.
4515 if (A.first == A.second)
4516 return;
4517
4518 assert(all_of(_Intervals,
4519 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4520 return !intersects(A, Interval);
4521 }) &&
4522 "A resource is being overwritten");
4523 _Intervals.push_back(A);
4524
4525 sortAndMerge();
4526
4527 // Do not keep the full history of the intervals, just the
4528 // latest #CutOff.
4529 while (_Intervals.size() > CutOff)
4530 _Intervals.pop_front();
4531}
4532
4535 assert(A.first <= A.second && "Invalid interval");
4536 assert(B.first <= B.second && "Invalid interval");
4537
4538 // Share one boundary.
4539 if ((A.first == B.first) || (A.second == B.second))
4540 return true;
4541
4542 // full intersersect: [ *** ) B
4543 // [***) A
4544 if ((A.first > B.first) && (A.second < B.second))
4545 return true;
4546
4547 // right intersect: [ ***) B
4548 // [*** ) A
4549 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4550 return true;
4551
4552 // left intersect: [*** ) B
4553 // [ ***) A
4554 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4555 return true;
4556
4557 return false;
4558}
4559
4560void ResourceSegments::sortAndMerge() {
4561 if (_Intervals.size() <= 1)
4562 return;
4563
4564 // First sort the collection.
4565 _Intervals.sort(sortIntervals);
4566
4567 // can use next because I have at least 2 elements in the list
4568 auto next = std::next(std::begin(_Intervals));
4569 auto E = std::end(_Intervals);
4570 for (; next != E; ++next) {
4571 if (std::prev(next)->second >= next->first) {
4572 next->first = std::prev(next)->first;
4573 _Intervals.erase(std::prev(next));
4574 continue;
4575 }
4576 }
4577}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
bool isArtificialDep() const
Definition: ScheduleDAG.h:686
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:683
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:278
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:275
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:300
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:424
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:416
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
unsigned NumPredsLeft
Definition: ScheduleDAG.h:274
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:301
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:276
bool isBottomReady() const
Definition: ScheduleDAG.h:467
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:291
bool isTopReady() const
Definition: ScheduleDAG.h:464
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:277
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:580
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:254
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:91
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153
cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
void initializePostMachineSchedulerPass(PassRegistry &)
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
cl::opt< bool > ViewMISchedDAGs
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineSchedulerPass(PassRegistry &)
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146
cl::opt< MISched::Direction > PreRADirection
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:56
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.