LLVM 19.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
51#include "llvm/Config/llvm-config.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 cl::desc("Force top-down list scheduling"));
83 cl::desc("Force bottom-up list scheduling"));
84namespace MISchedPostRASched {
89};
90} // end namespace MISchedPostRASched
92 "misched-postra-direction", cl::Hidden,
93 cl::desc("Post reg-alloc list scheduling direction"),
94 // Default to top-down because it was implemented first and existing targets
95 // expect that behavior by default.
99 "Force top-down post reg-alloc list scheduling"),
101 "Force bottom-up post reg-alloc list scheduling"),
103 "Force bidirectional post reg-alloc list scheduling")));
106 cl::desc("Print critical path length to stdout"));
107
109 "verify-misched", cl::Hidden,
110 cl::desc("Verify machine instrs before and after machine scheduling"));
111
112#ifndef NDEBUG
114 "view-misched-dags", cl::Hidden,
115 cl::desc("Pop up a window to show MISched dags after they are processed"));
116cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117 cl::desc("Print schedule DAGs"));
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
120 cl::desc("Dump resource usage at schedule boundary."));
122 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
123 cl::desc("Show details of invoking getNextResoufceCycle."));
124#else
125const bool ViewMISchedDAGs = false;
126const bool PrintDAGs = false;
127const bool MischedDetailResourceBooking = false;
128#ifdef LLVM_ENABLE_DUMP
129const bool MISchedDumpReservedCycles = false;
130#endif // LLVM_ENABLE_DUMP
131#endif // NDEBUG
132
133} // end namespace llvm
134
135#ifndef NDEBUG
136/// In some situations a few uninteresting nodes depend on nearly all other
137/// nodes in the graph, provide a cutoff to hide them.
138static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
142 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
143
145 cl::desc("Only schedule this function"));
146static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147 cl::desc("Only schedule this MBB#"));
148#endif // NDEBUG
149
150/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151/// size of the ready lists.
153 cl::desc("Limit ready list to N instructions"), cl::init(256));
154
155static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156 cl::desc("Enable register pressure scheduling."), cl::init(true));
157
158static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
160
162 cl::desc("Enable memop clustering."),
163 cl::init(true));
164static cl::opt<bool>
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
166 cl::desc("Switch to fast cluster algorithm with the lost "
167 "of some fusion opportunities"),
168 cl::init(false));
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171 cl::desc("The threshold for fast cluster"),
172 cl::init(1000));
173
174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
177 cl::desc("Dump resource usage at schedule boundary."));
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180 cl::desc("Set width of the columns with "
181 "the resources and schedule units"),
182 cl::init(19));
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185 cl::desc("Set width of the columns showing resource booking."),
186 cl::init(5));
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
189 cl::desc("Sort the resources printed in the dump trace"));
190#endif
191
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194 cl::desc("Number of intervals to track"), cl::init(10));
195
196// DAG subtrees must have at least this many nodes.
197static const unsigned MinSubtreeSize = 8;
198
199// Pin the vtables to this file.
200void MachineSchedStrategy::anchor() {}
201
202void ScheduleDAGMutation::anchor() {}
203
204//===----------------------------------------------------------------------===//
205// Machine Instruction Scheduling Pass and Registry
206//===----------------------------------------------------------------------===//
207
210}
211
213 delete RegClassInfo;
214}
215
216namespace {
217
218/// Base class for a machine scheduler class that can run at any point.
219class MachineSchedulerBase : public MachineSchedContext,
220 public MachineFunctionPass {
221public:
222 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
223
224 void print(raw_ostream &O, const Module* = nullptr) const override;
225
226protected:
227 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228};
229
230/// MachineScheduler runs after coalescing and before register allocation.
231class MachineScheduler : public MachineSchedulerBase {
232public:
233 MachineScheduler();
234
235 void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237 bool runOnMachineFunction(MachineFunction&) override;
238
239 static char ID; // Class identification, replacement for typeinfo
240
241protected:
242 ScheduleDAGInstrs *createMachineScheduler();
243};
244
245/// PostMachineScheduler runs after shortly before code emission.
246class PostMachineScheduler : public MachineSchedulerBase {
247public:
248 PostMachineScheduler();
249
250 void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252 bool runOnMachineFunction(MachineFunction&) override;
253
254 static char ID; // Class identification, replacement for typeinfo
255
256protected:
257 ScheduleDAGInstrs *createPostMachineScheduler();
258};
259
260} // end anonymous namespace
261
262char MachineScheduler::ID = 0;
263
264char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
267 "Machine Instruction Scheduler", false, false)
275
276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
278}
279
280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281 AU.setPreservesCFG();
291}
292
293char PostMachineScheduler::ID = 0;
294
295char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298 "PostRA Machine Instruction Scheduler", false, false)
302INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
304
305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
307}
308
309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310 AU.setPreservesCFG();
316}
317
320
321/// A dummy default scheduler factory indicates whether the scheduler
322/// is overridden on the command line.
324 return nullptr;
325}
326
327/// MachineSchedOpt allows command line selection of the scheduler.
332 cl::desc("Machine instruction scheduler to use"));
333
335DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
337
339 "enable-misched",
340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
341 cl::Hidden);
342
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
346 cl::init(true), cl::Hidden);
347
348/// Decrement this iterator until reaching the top or a non-debug instr.
352 assert(I != Beg && "reached the top of the region, cannot decrement");
353 while (--I != Beg) {
354 if (!I->isDebugOrPseudoInstr())
355 break;
356 }
357 return I;
358}
359
360/// Non-const version.
366}
367
368/// If this iterator is a debug value, increment until reaching the End or a
369/// non-debug instruction.
373 for(; I != End; ++I) {
374 if (!I->isDebugOrPseudoInstr())
375 break;
376 }
377 return I;
378}
379
380/// Non-const version.
386}
387
388/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390 // Select the scheduler, or set the default.
392 if (Ctor != useDefaultMachineSched)
393 return Ctor(this);
394
395 // Get the default scheduler set by the target for this function.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
397 if (Scheduler)
398 return Scheduler;
399
400 // Default to GenericScheduler.
401 return createGenericSchedLive(this);
402}
403
404/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405/// the caller. We don't have a command line option to override the postRA
406/// scheduler. The Target must configure it.
407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408 // Get the postRA scheduler set by the target for this function.
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
410 if (Scheduler)
411 return Scheduler;
412
413 // Default to GenericScheduler.
414 return createGenericSchedPostRA(this);
415}
416
417/// Top-level MachineScheduler pass driver.
418///
419/// Visit blocks in function order. Divide each block into scheduling regions
420/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421/// consistent with the DAG builder, which traverses the interior of the
422/// scheduling regions bottom-up.
423///
424/// This design avoids exposing scheduling boundaries to the DAG builder,
425/// simplifying the DAG builder's support for "special" target instructions.
426/// At the same time the design allows target schedulers to operate across
427/// scheduling boundaries, for example to bundle the boundary instructions
428/// without reordering them. This creates complexity, because the target
429/// scheduler must update the RegionBegin and RegionEnd positions cached by
430/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431/// design would be to split blocks at scheduling boundaries, but LLVM has a
432/// general bias against block splitting purely for implementation simplicity.
433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434 if (skipFunction(mf.getFunction()))
435 return false;
436
437 if (EnableMachineSched.getNumOccurrences()) {
439 return false;
440 } else if (!mf.getSubtarget().enableMachineScheduler())
441 return false;
442
443 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445 // Initialize the context of the pass.
446 MF = &mf;
447 MLI = &getAnalysis<MachineLoopInfo>();
448 MDT = &getAnalysis<MachineDominatorTree>();
449 PassConfig = &getAnalysis<TargetPassConfig>();
450 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452 LIS = &getAnalysis<LiveIntervals>();
453
454 if (VerifyScheduling) {
455 LLVM_DEBUG(LIS->dump());
456 MF->verify(this, "Before machine scheduling.");
457 }
458 RegClassInfo->runOnMachineFunction(*MF);
459
460 // Instantiate the selected scheduler for this target, function, and
461 // optimization level.
462 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
464 if (ForceTopDown)
466 else if (ForceBottomUp)
468 else
470 Scheduler->setDumpDirection(D);
471 scheduleRegions(*Scheduler, false);
472
473 LLVM_DEBUG(LIS->dump());
475 MF->verify(this, "After machine scheduling.");
476 return true;
477}
478
479bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
480 if (skipFunction(mf.getFunction()))
481 return false;
482
483 if (EnablePostRAMachineSched.getNumOccurrences()) {
485 return false;
486 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
487 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
488 return false;
489 }
490 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
491
492 // Initialize the context of the pass.
493 MF = &mf;
494 MLI = &getAnalysis<MachineLoopInfo>();
495 PassConfig = &getAnalysis<TargetPassConfig>();
496 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
497
499 MF->verify(this, "Before post machine scheduling.");
500
501 // Instantiate the selected scheduler for this target, function, and
502 // optimization level.
503 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
509 else
511 Scheduler->setDumpDirection(D);
512 scheduleRegions(*Scheduler, true);
513
515 MF->verify(this, "After post machine scheduling.");
516 return true;
517}
518
519/// Return true of the given instruction should not be included in a scheduling
520/// region.
521///
522/// MachineScheduler does not currently support scheduling across calls. To
523/// handle calls, the DAG builder needs to be modified to create register
524/// anti/output dependencies on the registers clobbered by the call's regmask
525/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
526/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
527/// the boundary, but there would be no benefit to postRA scheduling across
528/// calls this late anyway.
531 MachineFunction *MF,
532 const TargetInstrInfo *TII) {
533 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
534}
535
536/// A region of an MBB for scheduling.
537namespace {
538struct SchedRegion {
539 /// RegionBegin is the first instruction in the scheduling region, and
540 /// RegionEnd is either MBB->end() or the scheduling boundary after the
541 /// last instruction in the scheduling region. These iterators cannot refer
542 /// to instructions outside of the identified scheduling region because
543 /// those may be reordered before scheduling this region.
544 MachineBasicBlock::iterator RegionBegin;
546 unsigned NumRegionInstrs;
547
549 unsigned N) :
550 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
551};
552} // end anonymous namespace
553
555
556static void
558 MBBRegionsVector &Regions,
559 bool RegionsTopDown) {
562
564 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565 RegionEnd != MBB->begin(); RegionEnd = I) {
566
567 // Avoid decrementing RegionEnd for blocks with no terminator.
568 if (RegionEnd != MBB->end() ||
569 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
570 --RegionEnd;
571 }
572
573 // The next region starts above the previous region. Look backward in the
574 // instruction stream until we find the nearest boundary.
575 unsigned NumRegionInstrs = 0;
576 I = RegionEnd;
577 for (;I != MBB->begin(); --I) {
578 MachineInstr &MI = *std::prev(I);
579 if (isSchedBoundary(&MI, &*MBB, MF, TII))
580 break;
581 if (!MI.isDebugOrPseudoInstr()) {
582 // MBB::size() uses instr_iterator to count. Here we need a bundle to
583 // count as a single instruction.
584 ++NumRegionInstrs;
585 }
586 }
587
588 // It's possible we found a scheduling region that only has debug
589 // instructions. Don't bother scheduling these.
590 if (NumRegionInstrs != 0)
591 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
592 }
593
594 if (RegionsTopDown)
595 std::reverse(Regions.begin(), Regions.end());
596}
597
598/// Main driver for both MachineScheduler and PostMachineScheduler.
599void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
600 bool FixKillFlags) {
601 // Visit all machine basic blocks.
602 //
603 // TODO: Visit blocks in global postorder or postorder within the bottom-up
604 // loop tree. Then we can optionally compute global RegPressure.
605 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
606 MBB != MBBEnd; ++MBB) {
607
608 Scheduler.startBlock(&*MBB);
609
610#ifndef NDEBUG
611 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
612 continue;
613 if (SchedOnlyBlock.getNumOccurrences()
614 && (int)SchedOnlyBlock != MBB->getNumber())
615 continue;
616#endif
617
618 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
619 // points to the scheduling boundary at the bottom of the region. The DAG
620 // does not include RegionEnd, but the region does (i.e. the next
621 // RegionEnd is above the previous RegionBegin). If the current block has
622 // no terminator then RegionEnd == MBB->end() for the bottom region.
623 //
624 // All the regions of MBB are first found and stored in MBBRegions, which
625 // will be processed (MBB) top-down if initialized with true.
626 //
627 // The Scheduler may insert instructions during either schedule() or
628 // exitRegion(), even for empty regions. So the local iterators 'I' and
629 // 'RegionEnd' are invalid across these calls. Instructions must not be
630 // added to other regions than the current one without updating MBBRegions.
631
632 MBBRegionsVector MBBRegions;
633 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
634 for (const SchedRegion &R : MBBRegions) {
635 MachineBasicBlock::iterator I = R.RegionBegin;
636 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
637 unsigned NumRegionInstrs = R.NumRegionInstrs;
638
639 // Notify the scheduler of the region, even if we may skip scheduling
640 // it. Perhaps it still needs to be bundled.
641 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
642
643 // Skip empty scheduling regions (0 or 1 schedulable instructions).
644 if (I == RegionEnd || I == std::prev(RegionEnd)) {
645 // Close the current region. Bundle the terminator if needed.
646 // This invalidates 'RegionEnd' and 'I'.
647 Scheduler.exitRegion();
648 continue;
649 }
650 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
651 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652 << " " << MBB->getName() << "\n From: " << *I
653 << " To: ";
654 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
655 else dbgs() << "End\n";
656 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
658 errs() << MF->getName();
659 errs() << ":%bb. " << MBB->getNumber();
660 errs() << " " << MBB->getName() << " \n";
661 }
662
663 // Schedule a region: possibly reorder instructions.
664 // This invalidates the original region iterators.
665 Scheduler.schedule();
666
667 // Close the current region.
668 Scheduler.exitRegion();
669 }
670 Scheduler.finishBlock();
671 // FIXME: Ideally, no further passes should rely on kill flags. However,
672 // thumb2 size reduction is currently an exception, so the PostMIScheduler
673 // needs to do this.
674 if (FixKillFlags)
675 Scheduler.fixupKills(*MBB);
676 }
677 Scheduler.finalizeSchedule();
678}
679
680void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
681 // unimplemented
682}
683
684#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686 dbgs() << "Queue " << Name << ": ";
687 for (const SUnit *SU : Queue)
688 dbgs() << SU->NodeNum << " ";
689 dbgs() << "\n";
690}
691#endif
692
693//===----------------------------------------------------------------------===//
694// ScheduleDAGMI - Basic machine instruction scheduling. This is
695// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
696// virtual registers.
697// ===----------------------------------------------------------------------===/
698
699// Provide a vtable anchor.
701
702/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
703/// NumPredsLeft reaches zero, release the successor node.
704///
705/// FIXME: Adjust SuccSU height based on MinLatency.
707 SUnit *SuccSU = SuccEdge->getSUnit();
708
709 if (SuccEdge->isWeak()) {
710 --SuccSU->WeakPredsLeft;
711 if (SuccEdge->isCluster())
712 NextClusterSucc = SuccSU;
713 return;
714 }
715#ifndef NDEBUG
716 if (SuccSU->NumPredsLeft == 0) {
717 dbgs() << "*** Scheduling failed! ***\n";
718 dumpNode(*SuccSU);
719 dbgs() << " has been released too many times!\n";
720 llvm_unreachable(nullptr);
721 }
722#endif
723 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
724 // CurrCycle may have advanced since then.
725 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
727
728 --SuccSU->NumPredsLeft;
729 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
730 SchedImpl->releaseTopNode(SuccSU);
731}
732
733/// releaseSuccessors - Call releaseSucc on each of SU's successors.
735 for (SDep &Succ : SU->Succs)
736 releaseSucc(SU, &Succ);
737}
738
739/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
740/// NumSuccsLeft reaches zero, release the predecessor node.
741///
742/// FIXME: Adjust PredSU height based on MinLatency.
744 SUnit *PredSU = PredEdge->getSUnit();
745
746 if (PredEdge->isWeak()) {
747 --PredSU->WeakSuccsLeft;
748 if (PredEdge->isCluster())
749 NextClusterPred = PredSU;
750 return;
751 }
752#ifndef NDEBUG
753 if (PredSU->NumSuccsLeft == 0) {
754 dbgs() << "*** Scheduling failed! ***\n";
755 dumpNode(*PredSU);
756 dbgs() << " has been released too many times!\n";
757 llvm_unreachable(nullptr);
758 }
759#endif
760 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
761 // CurrCycle may have advanced since then.
762 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
764
765 --PredSU->NumSuccsLeft;
766 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
767 SchedImpl->releaseBottomNode(PredSU);
768}
769
770/// releasePredecessors - Call releasePred on each of SU's predecessors.
772 for (SDep &Pred : SU->Preds)
773 releasePred(SU, &Pred);
774}
775
778 SchedImpl->enterMBB(bb);
779}
780
782 SchedImpl->leaveMBB();
784}
785
786/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
787/// after crossing a scheduling boundary. [begin, end) includes all instructions
788/// in the region, including the boundary itself and single-instruction regions
789/// that don't get scheduled.
793 unsigned regioninstrs)
794{
795 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
796
797 SchedImpl->initPolicy(begin, end, regioninstrs);
798}
799
800/// This is normally called from the main scheduler loop but may also be invoked
801/// by the scheduling strategy to perform additional code motion.
804 // Advance RegionBegin if the first instruction moves down.
805 if (&*RegionBegin == MI)
806 ++RegionBegin;
807
808 // Update the instruction stream.
809 BB->splice(InsertPos, BB, MI);
810
811 // Update LiveIntervals
812 if (LIS)
813 LIS->handleMove(*MI, /*UpdateFlags=*/true);
814
815 // Recede RegionBegin if an instruction moves above the first.
816 if (RegionBegin == InsertPos)
817 RegionBegin = MI;
818}
819
821#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
822 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
824 return false;
825 }
826 ++NumInstrsScheduled;
827#endif
828 return true;
829}
830
831/// Per-region scheduling driver, called back from
832/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
833/// that does not consider liveness or register pressure. It is useful for
834/// PostRA scheduling and potentially other custom schedulers.
836 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
837 LLVM_DEBUG(SchedImpl->dumpPolicy());
838
839 // Build the DAG.
841
843
844 SmallVector<SUnit*, 8> TopRoots, BotRoots;
845 findRootsAndBiasEdges(TopRoots, BotRoots);
846
847 LLVM_DEBUG(dump());
848 if (PrintDAGs) dump();
850
851 // Initialize the strategy before modifying the DAG.
852 // This may initialize a DFSResult to be used for queue priority.
853 SchedImpl->initialize(this);
854
855 // Initialize ready queues now that the DAG and priority data are finalized.
856 initQueues(TopRoots, BotRoots);
857
858 bool IsTopNode = false;
859 while (true) {
860 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
861 SUnit *SU = SchedImpl->pickNode(IsTopNode);
862 if (!SU) break;
863
864 assert(!SU->isScheduled && "Node already scheduled");
865 if (!checkSchedLimit())
866 break;
867
868 MachineInstr *MI = SU->getInstr();
869 if (IsTopNode) {
870 assert(SU->isTopReady() && "node still has unscheduled dependencies");
871 if (&*CurrentTop == MI)
873 else
875 } else {
876 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
879 if (&*priorII == MI)
880 CurrentBottom = priorII;
881 else {
882 if (&*CurrentTop == MI)
883 CurrentTop = nextIfDebug(++CurrentTop, priorII);
886 }
887 }
888 // Notify the scheduling strategy before updating the DAG.
889 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
890 // runs, it can then use the accurate ReadyCycle time to determine whether
891 // newly released nodes can move to the readyQ.
892 SchedImpl->schedNode(SU, IsTopNode);
893
894 updateQueues(SU, IsTopNode);
895 }
896 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
897
899
900 LLVM_DEBUG({
901 dbgs() << "*** Final schedule for "
902 << printMBBReference(*begin()->getParent()) << " ***\n";
903 dumpSchedule();
904 dbgs() << '\n';
905 });
906}
907
908/// Apply each ScheduleDAGMutation step in order.
910 for (auto &m : Mutations)
911 m->apply(this);
912}
913
916 SmallVectorImpl<SUnit*> &BotRoots) {
917 for (SUnit &SU : SUnits) {
918 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
919
920 // Order predecessors so DFSResult follows the critical path.
921 SU.biasCriticalPath();
922
923 // A SUnit is ready to top schedule if it has no predecessors.
924 if (!SU.NumPredsLeft)
925 TopRoots.push_back(&SU);
926 // A SUnit is ready to bottom schedule if it has no successors.
927 if (!SU.NumSuccsLeft)
928 BotRoots.push_back(&SU);
929 }
931}
932
933/// Identify DAG roots and setup scheduler queues.
935 ArrayRef<SUnit*> BotRoots) {
936 NextClusterSucc = nullptr;
937 NextClusterPred = nullptr;
938
939 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
940 //
941 // Nodes with unreleased weak edges can still be roots.
942 // Release top roots in forward order.
943 for (SUnit *SU : TopRoots)
944 SchedImpl->releaseTopNode(SU);
945
946 // Release bottom roots in reverse order so the higher priority nodes appear
947 // first. This is more natural and slightly more efficient.
949 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
950 SchedImpl->releaseBottomNode(*I);
951 }
952
955
956 SchedImpl->registerRoots();
957
958 // Advance past initial DebugValues.
961}
962
963/// Update scheduler queues after scheduling an instruction.
964void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
965 // Release dependent instructions for scheduling.
966 if (IsTopNode)
968 else
970
971 SU->isScheduled = true;
972}
973
974/// Reinsert any remaining debug_values, just like the PostRA scheduler.
976 // If first instruction was a DBG_VALUE then put it back.
977 if (FirstDbgValue) {
980 }
981
982 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
983 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
984 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
985 MachineInstr *DbgValue = P.first;
986 MachineBasicBlock::iterator OrigPrevMI = P.second;
987 if (&*RegionBegin == DbgValue)
988 ++RegionBegin;
989 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
990 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
992 }
993}
994
995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996static const char *scheduleTableLegend = " i: issue\n x: resource booked";
997
999 // Bail off when there is no schedule model to query.
1001 return;
1002
1003 // Nothing to show if there is no or just one instruction.
1004 if (BB->size() < 2)
1005 return;
1006
1007 dbgs() << " * Schedule table (TopDown):\n";
1008 dbgs() << scheduleTableLegend << "\n";
1009 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1010 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1011 for (MachineInstr &MI : *this) {
1012 SUnit *SU = getSUnit(&MI);
1013 if (!SU)
1014 continue;
1015 const MCSchedClassDesc *SC = getSchedClass(SU);
1018 PI != PE; ++PI) {
1019 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1020 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1021 }
1022 }
1023 // Print the header with the cycles
1024 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1025 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1026 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1027 dbgs() << "|\n";
1028
1029 for (MachineInstr &MI : *this) {
1030 SUnit *SU = getSUnit(&MI);
1031 if (!SU) {
1032 dbgs() << "Missing SUnit\n";
1033 continue;
1034 }
1035 std::string NodeName("SU(");
1036 NodeName += std::to_string(SU->NodeNum) + ")";
1037 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1038 unsigned C = FirstCycle;
1039 for (; C <= LastCycle; ++C) {
1040 if (C == SU->TopReadyCycle)
1041 dbgs() << llvm::left_justify("| i", ColWidth);
1042 else
1043 dbgs() << llvm::left_justify("|", ColWidth);
1044 }
1045 dbgs() << "|\n";
1046 const MCSchedClassDesc *SC = getSchedClass(SU);
1047
1051
1053 llvm::stable_sort(ResourcesIt,
1054 [](const MCWriteProcResEntry &LHS,
1055 const MCWriteProcResEntry &RHS) -> bool {
1056 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1057 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1058 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1059 });
1060 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1061 C = FirstCycle;
1062 const std::string ResName =
1063 SchedModel.getResourceName(PI.ProcResourceIdx);
1064 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1065 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1066 dbgs() << llvm::left_justify("|", ColWidth);
1067 }
1068 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1069 ++I, ++C)
1070 dbgs() << llvm::left_justify("| x", ColWidth);
1071 while (C++ <= LastCycle)
1072 dbgs() << llvm::left_justify("|", ColWidth);
1073 // Place end char
1074 dbgs() << "| \n";
1075 }
1076 }
1077}
1078
1080 // Bail off when there is no schedule model to query.
1082 return;
1083
1084 // Nothing to show if there is no or just one instruction.
1085 if (BB->size() < 2)
1086 return;
1087
1088 dbgs() << " * Schedule table (BottomUp):\n";
1089 dbgs() << scheduleTableLegend << "\n";
1090
1091 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1092 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1093 for (MachineInstr &MI : *this) {
1094 SUnit *SU = getSUnit(&MI);
1095 if (!SU)
1096 continue;
1097 const MCSchedClassDesc *SC = getSchedClass(SU);
1100 PI != PE; ++PI) {
1101 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1102 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1103 }
1104 }
1105 // Print the header with the cycles
1106 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1107 for (int C = FirstCycle; C >= LastCycle; --C)
1108 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1109 dbgs() << "|\n";
1110
1111 for (MachineInstr &MI : *this) {
1112 SUnit *SU = getSUnit(&MI);
1113 if (!SU) {
1114 dbgs() << "Missing SUnit\n";
1115 continue;
1116 }
1117 std::string NodeName("SU(");
1118 NodeName += std::to_string(SU->NodeNum) + ")";
1119 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1120 int C = FirstCycle;
1121 for (; C >= LastCycle; --C) {
1122 if (C == (int)SU->BotReadyCycle)
1123 dbgs() << llvm::left_justify("| i", ColWidth);
1124 else
1125 dbgs() << llvm::left_justify("|", ColWidth);
1126 }
1127 dbgs() << "|\n";
1128 const MCSchedClassDesc *SC = getSchedClass(SU);
1132
1134 llvm::stable_sort(ResourcesIt,
1135 [](const MCWriteProcResEntry &LHS,
1136 const MCWriteProcResEntry &RHS) -> bool {
1137 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1138 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1139 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1140 });
1141 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1142 C = FirstCycle;
1143 const std::string ResName =
1144 SchedModel.getResourceName(PI.ProcResourceIdx);
1145 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1146 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1147 dbgs() << llvm::left_justify("|", ColWidth);
1148 }
1149 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150 ++I, --C)
1151 dbgs() << llvm::left_justify("| x", ColWidth);
1152 while (C-- >= LastCycle)
1153 dbgs() << llvm::left_justify("|", ColWidth);
1154 // Place end char
1155 dbgs() << "| \n";
1156 }
1157 }
1158}
1159#endif
1160
1161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1166 else if (DumpDir == DumpDirection::BottomUp)
1169 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1170 } else {
1171 dbgs() << "* Schedule table: DumpDirection not set.\n";
1172 }
1173 }
1174
1175 for (MachineInstr &MI : *this) {
1176 if (SUnit *SU = getSUnit(&MI))
1177 dumpNode(*SU);
1178 else
1179 dbgs() << "Missing SUnit\n";
1180 }
1181}
1182#endif
1183
1184//===----------------------------------------------------------------------===//
1185// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1186// preservation.
1187//===----------------------------------------------------------------------===//
1188
1190 delete DFSResult;
1191}
1192
1194 const MachineInstr &MI = *SU.getInstr();
1195 for (const MachineOperand &MO : MI.operands()) {
1196 if (!MO.isReg())
1197 continue;
1198 if (!MO.readsReg())
1199 continue;
1200 if (TrackLaneMasks && !MO.isUse())
1201 continue;
1202
1203 Register Reg = MO.getReg();
1204 if (!Reg.isVirtual())
1205 continue;
1206
1207 // Ignore re-defs.
1208 if (TrackLaneMasks) {
1209 bool FoundDef = false;
1210 for (const MachineOperand &MO2 : MI.all_defs()) {
1211 if (MO2.getReg() == Reg && !MO2.isDead()) {
1212 FoundDef = true;
1213 break;
1214 }
1215 }
1216 if (FoundDef)
1217 continue;
1218 }
1219
1220 // Record this local VReg use.
1222 for (; UI != VRegUses.end(); ++UI) {
1223 if (UI->SU == &SU)
1224 break;
1225 }
1226 if (UI == VRegUses.end())
1228 }
1229}
1230
1231/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1232/// crossing a scheduling boundary. [begin, end) includes all instructions in
1233/// the region, including the boundary itself and single-instruction regions
1234/// that don't get scheduled.
1238 unsigned regioninstrs)
1239{
1240 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1241 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1242
1243 // For convenience remember the end of the liveness region.
1244 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1245
1247
1248 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1249 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1250
1252 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1253}
1254
1255// Setup the register pressure trackers for the top scheduled and bottom
1256// scheduled regions.
1258 VRegUses.clear();
1260 for (SUnit &SU : SUnits)
1261 collectVRegUses(SU);
1262
1264 ShouldTrackLaneMasks, false);
1266 ShouldTrackLaneMasks, false);
1267
1268 // Close the RPTracker to finalize live ins.
1270
1272
1273 // Initialize the live ins and live outs.
1276
1277 // Close one end of the tracker so we can call
1278 // getMaxUpward/DownwardPressureDelta before advancing across any
1279 // instructions. This converts currently live regs into live ins/outs.
1282
1284 if (!BotRPTracker.getLiveThru().empty()) {
1286 LLVM_DEBUG(dbgs() << "Live Thru: ";
1288 };
1289
1290 // For each live out vreg reduce the pressure change associated with other
1291 // uses of the same vreg below the live-out reaching def.
1293
1294 // Account for liveness generated by the region boundary.
1295 if (LiveRegionEnd != RegionEnd) {
1297 BotRPTracker.recede(&LiveUses);
1298 updatePressureDiffs(LiveUses);
1299 }
1300
1301 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1303 dbgs() << "Bottom Pressure:\n";
1305
1307 (RegionEnd->isDebugInstr() &&
1309 "Can't find the region bottom");
1310
1311 // Cache the list of excess pressure sets in this region. This will also track
1312 // the max pressure in the scheduled code for these sets.
1313 RegionCriticalPSets.clear();
1314 const std::vector<unsigned> &RegionPressure =
1316 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1317 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1318 if (RegionPressure[i] > Limit) {
1319 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1320 << " Actual " << RegionPressure[i] << "\n");
1321 RegionCriticalPSets.push_back(PressureChange(i));
1322 }
1323 }
1324 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1325 for (const PressureChange &RCPS
1327 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1328 dbgs() << "\n");
1329}
1330
1333 const std::vector<unsigned> &NewMaxPressure) {
1334 const PressureDiff &PDiff = getPressureDiff(SU);
1335 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1336 for (const PressureChange &PC : PDiff) {
1337 if (!PC.isValid())
1338 break;
1339 unsigned ID = PC.getPSet();
1340 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1341 ++CritIdx;
1342 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1343 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1344 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1345 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1346 }
1347 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1348 if (NewMaxPressure[ID] >= Limit - 2) {
1349 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1350 << NewMaxPressure[ID]
1351 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1352 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1353 << " livethru)\n");
1354 }
1355 }
1356}
1357
1358/// Update the PressureDiff array for liveness after scheduling this
1359/// instruction.
1361 ArrayRef<RegisterMaskPair> LiveUses) {
1362 for (const RegisterMaskPair &P : LiveUses) {
1363 Register Reg = P.RegUnit;
1364 /// FIXME: Currently assuming single-use physregs.
1365 if (!Reg.isVirtual())
1366 continue;
1367
1369 // If the register has just become live then other uses won't change
1370 // this fact anymore => decrement pressure.
1371 // If the register has just become dead then other uses make it come
1372 // back to life => increment pressure.
1373 bool Decrement = P.LaneMask.any();
1374
1375 for (const VReg2SUnit &V2SU
1376 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1377 SUnit &SU = *V2SU.SU;
1378 if (SU.isScheduled || &SU == &ExitSU)
1379 continue;
1380
1381 PressureDiff &PDiff = getPressureDiff(&SU);
1382 PDiff.addPressureChange(Reg, Decrement, &MRI);
1383 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1384 << printReg(Reg, TRI) << ':'
1385 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1386 dbgs() << " to "; PDiff.dump(*TRI););
1387 }
1388 } else {
1389 assert(P.LaneMask.any());
1390 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1391 // This may be called before CurrentBottom has been initialized. However,
1392 // BotRPTracker must have a valid position. We want the value live into the
1393 // instruction or live out of the block, so ask for the previous
1394 // instruction's live-out.
1395 const LiveInterval &LI = LIS->getInterval(Reg);
1396 VNInfo *VNI;
1399 if (I == BB->end())
1400 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1401 else {
1403 VNI = LRQ.valueIn();
1404 }
1405 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1406 assert(VNI && "No live value at use.");
1407 for (const VReg2SUnit &V2SU
1408 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1409 SUnit *SU = V2SU.SU;
1410 // If this use comes before the reaching def, it cannot be a last use,
1411 // so decrease its pressure change.
1412 if (!SU->isScheduled && SU != &ExitSU) {
1413 LiveQueryResult LRQ =
1415 if (LRQ.valueIn() == VNI) {
1416 PressureDiff &PDiff = getPressureDiff(SU);
1417 PDiff.addPressureChange(Reg, true, &MRI);
1418 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419 << *SU->getInstr();
1420 dbgs() << " to "; PDiff.dump(*TRI););
1421 }
1422 }
1423 }
1424 }
1425 }
1426}
1427
1429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 if (EntrySU.getInstr() != nullptr)
1432 for (const SUnit &SU : SUnits) {
1433 dumpNodeAll(SU);
1434 if (ShouldTrackPressure) {
1435 dbgs() << " Pressure Diff : ";
1436 getPressureDiff(&SU).dump(*TRI);
1437 }
1438 dbgs() << " Single Issue : ";
1439 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1440 SchedModel.mustEndGroup(SU.getInstr()))
1441 dbgs() << "true;";
1442 else
1443 dbgs() << "false;";
1444 dbgs() << '\n';
1445 }
1446 if (ExitSU.getInstr() != nullptr)
1448#endif
1449}
1450
1451/// schedule - Called back from MachineScheduler::runOnMachineFunction
1452/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1453/// only includes instructions that have DAG nodes, not scheduling boundaries.
1454///
1455/// This is a skeletal driver, with all the functionality pushed into helpers,
1456/// so that it can be easily extended by experimental schedulers. Generally,
1457/// implementing MachineSchedStrategy should be sufficient to implement a new
1458/// scheduling algorithm. However, if a scheduler further subclasses
1459/// ScheduleDAGMILive then it will want to override this virtual method in order
1460/// to update any specialized state.
1462 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1463 LLVM_DEBUG(SchedImpl->dumpPolicy());
1465
1467
1468 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1469 findRootsAndBiasEdges(TopRoots, BotRoots);
1470
1471 // Initialize the strategy before modifying the DAG.
1472 // This may initialize a DFSResult to be used for queue priority.
1473 SchedImpl->initialize(this);
1474
1475 LLVM_DEBUG(dump());
1476 if (PrintDAGs) dump();
1478
1479 // Initialize ready queues now that the DAG and priority data are finalized.
1480 initQueues(TopRoots, BotRoots);
1481
1482 bool IsTopNode = false;
1483 while (true) {
1484 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1485 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1486 if (!SU) break;
1487
1488 assert(!SU->isScheduled && "Node already scheduled");
1489 if (!checkSchedLimit())
1490 break;
1491
1492 scheduleMI(SU, IsTopNode);
1493
1494 if (DFSResult) {
1495 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1496 if (!ScheduledTrees.test(SubtreeID)) {
1497 ScheduledTrees.set(SubtreeID);
1498 DFSResult->scheduleTree(SubtreeID);
1499 SchedImpl->scheduleTree(SubtreeID);
1500 }
1501 }
1502
1503 // Notify the scheduling strategy after updating the DAG.
1504 SchedImpl->schedNode(SU, IsTopNode);
1505
1506 updateQueues(SU, IsTopNode);
1507 }
1508 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1509
1511
1512 LLVM_DEBUG({
1513 dbgs() << "*** Final schedule for "
1514 << printMBBReference(*begin()->getParent()) << " ***\n";
1515 dumpSchedule();
1516 dbgs() << '\n';
1517 });
1518}
1519
1520/// Build the DAG and setup three register pressure trackers.
1522 if (!ShouldTrackPressure) {
1523 RPTracker.reset();
1524 RegionCriticalPSets.clear();
1526 return;
1527 }
1528
1529 // Initialize the register pressure tracker used by buildSchedGraph.
1531 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1532
1533 // Account for liveness generate by the region boundary.
1534 if (LiveRegionEnd != RegionEnd)
1535 RPTracker.recede();
1536
1537 // Build the DAG, and compute current register pressure.
1539
1540 // Initialize top/bottom trackers after computing region pressure.
1542}
1543
1545 if (!DFSResult)
1546 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1547 DFSResult->clear();
1549 DFSResult->resize(SUnits.size());
1552}
1553
1554/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1555/// only provides the critical path for single block loops. To handle loops that
1556/// span blocks, we could use the vreg path latencies provided by
1557/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1558/// available for use in the scheduler.
1559///
1560/// The cyclic path estimation identifies a def-use pair that crosses the back
1561/// edge and considers the depth and height of the nodes. For example, consider
1562/// the following instruction sequence where each instruction has unit latency
1563/// and defines an eponymous virtual register:
1564///
1565/// a->b(a,c)->c(b)->d(c)->exit
1566///
1567/// The cyclic critical path is a two cycles: b->c->b
1568/// The acyclic critical path is four cycles: a->b->c->d->exit
1569/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572/// LiveInDepth = depth(b) = len(a->b) = 1
1573///
1574/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1576/// CyclicCriticalPath = min(2, 2) = 2
1577///
1578/// This could be relevant to PostRA scheduling, but is currently implemented
1579/// assuming LiveIntervals.
1581 // This only applies to single block loop.
1582 if (!BB->isSuccessor(BB))
1583 return 0;
1584
1585 unsigned MaxCyclicLatency = 0;
1586 // Visit each live out vreg def to find def/use pairs that cross iterations.
1588 Register Reg = P.RegUnit;
1589 if (!Reg.isVirtual())
1590 continue;
1591 const LiveInterval &LI = LIS->getInterval(Reg);
1592 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1593 if (!DefVNI)
1594 continue;
1595
1597 const SUnit *DefSU = getSUnit(DefMI);
1598 if (!DefSU)
1599 continue;
1600
1601 unsigned LiveOutHeight = DefSU->getHeight();
1602 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1603 // Visit all local users of the vreg def.
1604 for (const VReg2SUnit &V2SU
1605 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1606 SUnit *SU = V2SU.SU;
1607 if (SU == &ExitSU)
1608 continue;
1609
1610 // Only consider uses of the phi.
1612 if (!LRQ.valueIn()->isPHIDef())
1613 continue;
1614
1615 // Assume that a path spanning two iterations is a cycle, which could
1616 // overestimate in strange cases. This allows cyclic latency to be
1617 // estimated as the minimum slack of the vreg's depth or height.
1618 unsigned CyclicLatency = 0;
1619 if (LiveOutDepth > SU->getDepth())
1620 CyclicLatency = LiveOutDepth - SU->getDepth();
1621
1622 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1623 if (LiveInHeight > LiveOutHeight) {
1624 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625 CyclicLatency = LiveInHeight - LiveOutHeight;
1626 } else
1627 CyclicLatency = 0;
1628
1629 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1631 if (CyclicLatency > MaxCyclicLatency)
1632 MaxCyclicLatency = CyclicLatency;
1633 }
1634 }
1635 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1636 return MaxCyclicLatency;
1637}
1638
1639/// Release ExitSU predecessors and setup scheduler queues. Re-position
1640/// the Top RP tracker in case the region beginning has changed.
1642 ArrayRef<SUnit*> BotRoots) {
1643 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1644 if (ShouldTrackPressure) {
1645 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1647 }
1648}
1649
1650/// Move an instruction and update register pressure.
1651void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1652 // Move the instruction to its new location in the instruction stream.
1653 MachineInstr *MI = SU->getInstr();
1654
1655 if (IsTopNode) {
1656 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1657 if (&*CurrentTop == MI)
1659 else {
1662 }
1663
1664 if (ShouldTrackPressure) {
1665 // Update top scheduled pressure.
1666 RegisterOperands RegOpers;
1667 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1669 // Adjust liveness and add missing dead+read-undef flags.
1670 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1671 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1672 } else {
1673 // Adjust for missing dead-def flags.
1674 RegOpers.detectDeadDefs(*MI, *LIS);
1675 }
1676
1677 TopRPTracker.advance(RegOpers);
1678 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1679 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1681
1683 }
1684 } else {
1685 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1688 if (&*priorII == MI)
1689 CurrentBottom = priorII;
1690 else {
1691 if (&*CurrentTop == MI) {
1692 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1694 }
1696 CurrentBottom = MI;
1698 }
1699 if (ShouldTrackPressure) {
1700 RegisterOperands RegOpers;
1701 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1703 // Adjust liveness and add missing dead+read-undef flags.
1704 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1705 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1706 } else {
1707 // Adjust for missing dead-def flags.
1708 RegOpers.detectDeadDefs(*MI, *LIS);
1709 }
1710
1714 BotRPTracker.recede(RegOpers, &LiveUses);
1715 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1716 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1718
1720 updatePressureDiffs(LiveUses);
1721 }
1722 }
1723}
1724
1725//===----------------------------------------------------------------------===//
1726// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1727//===----------------------------------------------------------------------===//
1728
1729namespace {
1730
1731/// Post-process the DAG to create cluster edges between neighboring
1732/// loads or between neighboring stores.
1733class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1734 struct MemOpInfo {
1735 SUnit *SU;
1737 int64_t Offset;
1738 LocationSize Width;
1739 bool OffsetIsScalable;
1740
1741 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1742 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1743 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1744 Width(Width), OffsetIsScalable(OffsetIsScalable) {}
1745
1746 static bool Compare(const MachineOperand *const &A,
1747 const MachineOperand *const &B) {
1748 if (A->getType() != B->getType())
1749 return A->getType() < B->getType();
1750 if (A->isReg())
1751 return A->getReg() < B->getReg();
1752 if (A->isFI()) {
1753 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1755 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1757 return StackGrowsDown ? A->getIndex() > B->getIndex()
1758 : A->getIndex() < B->getIndex();
1759 }
1760
1761 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1762 "index bases.");
1763 }
1764
1765 bool operator<(const MemOpInfo &RHS) const {
1766 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1767 // comparison instead when it's available.
1768 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1769 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1770 Compare))
1771 return true;
1772 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1773 BaseOps.begin(), BaseOps.end(), Compare))
1774 return false;
1775 if (Offset != RHS.Offset)
1776 return Offset < RHS.Offset;
1777 return SU->NodeNum < RHS.SU->NodeNum;
1778 }
1779 };
1780
1781 const TargetInstrInfo *TII;
1782 const TargetRegisterInfo *TRI;
1783 bool IsLoad;
1784 bool ReorderWhileClustering;
1785
1786public:
1787 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1788 const TargetRegisterInfo *tri, bool IsLoad,
1789 bool ReorderWhileClustering)
1790 : TII(tii), TRI(tri), IsLoad(IsLoad),
1791 ReorderWhileClustering(ReorderWhileClustering) {}
1792
1793 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1794
1795protected:
1796 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1797 ScheduleDAGInstrs *DAG);
1798 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1799 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1800 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1802};
1803
1804class StoreClusterMutation : public BaseMemOpClusterMutation {
1805public:
1806 StoreClusterMutation(const TargetInstrInfo *tii,
1807 const TargetRegisterInfo *tri,
1808 bool ReorderWhileClustering)
1809 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1810};
1811
1812class LoadClusterMutation : public BaseMemOpClusterMutation {
1813public:
1814 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1815 bool ReorderWhileClustering)
1816 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1817};
1818
1819} // end anonymous namespace
1820
1821namespace llvm {
1822
1823std::unique_ptr<ScheduleDAGMutation>
1825 const TargetRegisterInfo *TRI,
1826 bool ReorderWhileClustering) {
1827 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1828 TII, TRI, ReorderWhileClustering)
1829 : nullptr;
1830}
1831
1832std::unique_ptr<ScheduleDAGMutation>
1834 const TargetRegisterInfo *TRI,
1835 bool ReorderWhileClustering) {
1836 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1837 TII, TRI, ReorderWhileClustering)
1838 : nullptr;
1839}
1840
1841} // end namespace llvm
1842
1843// Sorting all the loads/stores first, then for each load/store, checking the
1844// following load/store one by one, until reach the first non-dependent one and
1845// call target hook to see if they can cluster.
1846// If FastCluster is enabled, we assume that, all the loads/stores have been
1847// preprocessed and now, they didn't have dependencies on each other.
1848void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1849 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1850 ScheduleDAGInstrs *DAG) {
1851 // Keep track of the current cluster length and bytes for each SUnit.
1853
1854 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1855 // cluster mem ops collected within `MemOpRecords` array.
1856 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1857 // Decision to cluster mem ops is taken based on target dependent logic
1858 auto MemOpa = MemOpRecords[Idx];
1859
1860 // Seek for the next load/store to do the cluster.
1861 unsigned NextIdx = Idx + 1;
1862 for (; NextIdx < End; ++NextIdx)
1863 // Skip if MemOpb has been clustered already or has dependency with
1864 // MemOpa.
1865 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1866 (FastCluster ||
1867 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1868 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1869 break;
1870 if (NextIdx == End)
1871 continue;
1872
1873 auto MemOpb = MemOpRecords[NextIdx];
1874 unsigned ClusterLength = 2;
1875 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1876 MemOpb.Width.getValue().getKnownMinValue();
1877 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1878 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1879 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1880 MemOpb.Width.getValue().getKnownMinValue();
1881 }
1882
1883 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1884 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1885 MemOpb.Offset, MemOpb.OffsetIsScalable,
1886 ClusterLength, CurrentClusterBytes))
1887 continue;
1888
1889 SUnit *SUa = MemOpa.SU;
1890 SUnit *SUb = MemOpb.SU;
1891 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1892 std::swap(SUa, SUb);
1893
1894 // FIXME: Is this check really required?
1895 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1896 continue;
1897
1898 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1899 << SUb->NodeNum << ")\n");
1900 ++NumClustered;
1901
1902 if (IsLoad) {
1903 // Copy successor edges from SUa to SUb. Interleaving computation
1904 // dependent on SUa can prevent load combining due to register reuse.
1905 // Predecessor edges do not need to be copied from SUb to SUa since
1906 // nearby loads should have effectively the same inputs.
1907 for (const SDep &Succ : SUa->Succs) {
1908 if (Succ.getSUnit() == SUb)
1909 continue;
1910 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1911 << ")\n");
1912 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1913 }
1914 } else {
1915 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1916 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1917 // do not need to be copied from SUa to SUb since no one will depend
1918 // on stores.
1919 // Notice that, we don't need to care about the memory dependency as
1920 // we won't try to cluster them if they have any memory dependency.
1921 for (const SDep &Pred : SUb->Preds) {
1922 if (Pred.getSUnit() == SUa)
1923 continue;
1924 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1925 << ")\n");
1926 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1927 }
1928 }
1929
1930 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1931 CurrentClusterBytes};
1932
1933 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1934 << ", Curr cluster bytes: " << CurrentClusterBytes
1935 << "\n");
1936 }
1937}
1938
1939void BaseMemOpClusterMutation::collectMemOpRecords(
1940 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1941 for (auto &SU : SUnits) {
1942 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1943 (!IsLoad && !SU.getInstr()->mayStore()))
1944 continue;
1945
1946 const MachineInstr &MI = *SU.getInstr();
1948 int64_t Offset;
1949 bool OffsetIsScalable;
1950 LocationSize Width = 0;
1952 OffsetIsScalable, Width, TRI)) {
1953 MemOpRecords.push_back(
1954 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1955
1956 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1957 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1958 << ", Width: " << Width << "\n");
1959 }
1960#ifndef NDEBUG
1961 for (const auto *Op : BaseOps)
1962 assert(Op);
1963#endif
1964 }
1965}
1966
1967bool BaseMemOpClusterMutation::groupMemOps(
1970 bool FastCluster =
1972 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1973
1974 for (const auto &MemOp : MemOps) {
1975 unsigned ChainPredID = DAG->SUnits.size();
1976 if (FastCluster) {
1977 for (const SDep &Pred : MemOp.SU->Preds) {
1978 // We only want to cluster the mem ops that have the same ctrl(non-data)
1979 // pred so that they didn't have ctrl dependency for each other. But for
1980 // store instrs, we can still cluster them if the pred is load instr.
1981 if ((Pred.isCtrl() &&
1982 (IsLoad ||
1983 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1984 !Pred.isArtificial()) {
1985 ChainPredID = Pred.getSUnit()->NodeNum;
1986 break;
1987 }
1988 }
1989 } else
1990 ChainPredID = 0;
1991
1992 Groups[ChainPredID].push_back(MemOp);
1993 }
1994 return FastCluster;
1995}
1996
1997/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1998void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1999 // Collect all the clusterable loads/stores
2000 SmallVector<MemOpInfo, 32> MemOpRecords;
2001 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2002
2003 if (MemOpRecords.size() < 2)
2004 return;
2005
2006 // Put the loads/stores without dependency into the same group with some
2007 // heuristic if the DAG is too complex to avoid compiling time blow up.
2008 // Notice that, some fusion pair could be lost with this.
2010 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2011
2012 for (auto &Group : Groups) {
2013 // Sorting the loads/stores, so that, we can stop the cluster as early as
2014 // possible.
2015 llvm::sort(Group.second);
2016
2017 // Trying to cluster all the neighboring loads/stores.
2018 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2019 }
2020}
2021
2022//===----------------------------------------------------------------------===//
2023// CopyConstrain - DAG post-processing to encourage copy elimination.
2024//===----------------------------------------------------------------------===//
2025
2026namespace {
2027
2028/// Post-process the DAG to create weak edges from all uses of a copy to
2029/// the one use that defines the copy's source vreg, most likely an induction
2030/// variable increment.
2031class CopyConstrain : public ScheduleDAGMutation {
2032 // Transient state.
2033 SlotIndex RegionBeginIdx;
2034
2035 // RegionEndIdx is the slot index of the last non-debug instruction in the
2036 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2037 SlotIndex RegionEndIdx;
2038
2039public:
2040 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2041
2042 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2043
2044protected:
2045 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2046};
2047
2048} // end anonymous namespace
2049
2050namespace llvm {
2051
2052std::unique_ptr<ScheduleDAGMutation>
2054 const TargetRegisterInfo *TRI) {
2055 return std::make_unique<CopyConstrain>(TII, TRI);
2056}
2057
2058} // end namespace llvm
2059
2060/// constrainLocalCopy handles two possibilities:
2061/// 1) Local src:
2062/// I0: = dst
2063/// I1: src = ...
2064/// I2: = dst
2065/// I3: dst = src (copy)
2066/// (create pred->succ edges I0->I1, I2->I1)
2067///
2068/// 2) Local copy:
2069/// I0: dst = src (copy)
2070/// I1: = dst
2071/// I2: src = ...
2072/// I3: = dst
2073/// (create pred->succ edges I1->I2, I3->I2)
2074///
2075/// Although the MachineScheduler is currently constrained to single blocks,
2076/// this algorithm should handle extended blocks. An EBB is a set of
2077/// contiguously numbered blocks such that the previous block in the EBB is
2078/// always the single predecessor.
2079void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2080 LiveIntervals *LIS = DAG->getLIS();
2081 MachineInstr *Copy = CopySU->getInstr();
2082
2083 // Check for pure vreg copies.
2084 const MachineOperand &SrcOp = Copy->getOperand(1);
2085 Register SrcReg = SrcOp.getReg();
2086 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2087 return;
2088
2089 const MachineOperand &DstOp = Copy->getOperand(0);
2090 Register DstReg = DstOp.getReg();
2091 if (!DstReg.isVirtual() || DstOp.isDead())
2092 return;
2093
2094 // Check if either the dest or source is local. If it's live across a back
2095 // edge, it's not local. Note that if both vregs are live across the back
2096 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2097 // If both the copy's source and dest are local live intervals, then we
2098 // should treat the dest as the global for the purpose of adding
2099 // constraints. This adds edges from source's other uses to the copy.
2100 unsigned LocalReg = SrcReg;
2101 unsigned GlobalReg = DstReg;
2102 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2103 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2104 LocalReg = DstReg;
2105 GlobalReg = SrcReg;
2106 LocalLI = &LIS->getInterval(LocalReg);
2107 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2108 return;
2109 }
2110 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2111
2112 // Find the global segment after the start of the local LI.
2113 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2114 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2115 // local live range. We could create edges from other global uses to the local
2116 // start, but the coalescer should have already eliminated these cases, so
2117 // don't bother dealing with it.
2118 if (GlobalSegment == GlobalLI->end())
2119 return;
2120
2121 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2122 // returned the next global segment. But if GlobalSegment overlaps with
2123 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2124 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2125 if (GlobalSegment->contains(LocalLI->beginIndex()))
2126 ++GlobalSegment;
2127
2128 if (GlobalSegment == GlobalLI->end())
2129 return;
2130
2131 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2132 if (GlobalSegment != GlobalLI->begin()) {
2133 // Two address defs have no hole.
2134 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2135 GlobalSegment->start)) {
2136 return;
2137 }
2138 // If the prior global segment may be defined by the same two-address
2139 // instruction that also defines LocalLI, then can't make a hole here.
2140 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2141 LocalLI->beginIndex())) {
2142 return;
2143 }
2144 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2145 // it would be a disconnected component in the live range.
2146 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2147 "Disconnected LRG within the scheduling region.");
2148 }
2149 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2150 if (!GlobalDef)
2151 return;
2152
2153 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2154 if (!GlobalSU)
2155 return;
2156
2157 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2158 // constraining the uses of the last local def to precede GlobalDef.
2159 SmallVector<SUnit*,8> LocalUses;
2160 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2161 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2162 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2163 for (const SDep &Succ : LastLocalSU->Succs) {
2164 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2165 continue;
2166 if (Succ.getSUnit() == GlobalSU)
2167 continue;
2168 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2169 return;
2170 LocalUses.push_back(Succ.getSUnit());
2171 }
2172 // Open the top of the GlobalLI hole by constraining any earlier global uses
2173 // to precede the start of LocalLI.
2174 SmallVector<SUnit*,8> GlobalUses;
2175 MachineInstr *FirstLocalDef =
2176 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2177 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2178 for (const SDep &Pred : GlobalSU->Preds) {
2179 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2180 continue;
2181 if (Pred.getSUnit() == FirstLocalSU)
2182 continue;
2183 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2184 return;
2185 GlobalUses.push_back(Pred.getSUnit());
2186 }
2187 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2188 // Add the weak edges.
2189 for (SUnit *LU : LocalUses) {
2190 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2191 << GlobalSU->NodeNum << ")\n");
2192 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2193 }
2194 for (SUnit *GU : GlobalUses) {
2195 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2196 << FirstLocalSU->NodeNum << ")\n");
2197 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2198 }
2199}
2200
2201/// Callback from DAG postProcessing to create weak edges to encourage
2202/// copy elimination.
2203void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2204 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2205 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2206
2207 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2208 if (FirstPos == DAG->end())
2209 return;
2210 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2211 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2212 *priorNonDebug(DAG->end(), DAG->begin()));
2213
2214 for (SUnit &SU : DAG->SUnits) {
2215 if (!SU.getInstr()->isCopy())
2216 continue;
2217
2218 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2219 }
2220}
2221
2222//===----------------------------------------------------------------------===//
2223// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2224// and possibly other custom schedulers.
2225//===----------------------------------------------------------------------===//
2226
2227static const unsigned InvalidCycle = ~0U;
2228
2230
2231/// Given a Count of resource usage and a Latency value, return true if a
2232/// SchedBoundary becomes resource limited.
2233/// If we are checking after scheduling a node, we should return true when
2234/// we just reach the resource limit.
2235static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2236 unsigned Latency, bool AfterSchedNode) {
2237 int ResCntFactor = (int)(Count - (Latency * LFactor));
2238 if (AfterSchedNode)
2239 return ResCntFactor >= (int)LFactor;
2240 else
2241 return ResCntFactor > (int)LFactor;
2242}
2243
2245 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2246 // Destroying and reconstructing it is very expensive though. So keep
2247 // invalid, placeholder HazardRecs.
2248 if (HazardRec && HazardRec->isEnabled()) {
2249 delete HazardRec;
2250 HazardRec = nullptr;
2251 }
2252 Available.clear();
2253 Pending.clear();
2254 CheckPending = false;
2255 CurrCycle = 0;
2256 CurrMOps = 0;
2257 MinReadyCycle = std::numeric_limits<unsigned>::max();
2258 ExpectedLatency = 0;
2259 DependentLatency = 0;
2260 RetiredMOps = 0;
2261 MaxExecutedResCount = 0;
2262 ZoneCritResIdx = 0;
2263 IsResourceLimited = false;
2264 ReservedCycles.clear();
2265 ReservedResourceSegments.clear();
2266 ReservedCyclesIndex.clear();
2267 ResourceGroupSubUnitMasks.clear();
2268#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2269 // Track the maximum number of stall cycles that could arise either from the
2270 // latency of a DAG edge or the number of cycles that a processor resource is
2271 // reserved (SchedBoundary::ReservedCycles).
2272 MaxObservedStall = 0;
2273#endif
2274 // Reserve a zero-count for invalid CritResIdx.
2275 ExecutedResCounts.resize(1);
2276 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2277}
2278
2280init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2281 reset();
2282 if (!SchedModel->hasInstrSchedModel())
2283 return;
2285 for (SUnit &SU : DAG->SUnits) {
2286 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2287 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2288 * SchedModel->getMicroOpFactor();
2290 PI = SchedModel->getWriteProcResBegin(SC),
2291 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2292 unsigned PIdx = PI->ProcResourceIdx;
2293 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2294 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2295 RemainingCounts[PIdx] +=
2296 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2297 }
2298 }
2299}
2300
2302init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2303 reset();
2304 DAG = dag;
2305 SchedModel = smodel;
2306 Rem = rem;
2308 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2309 ReservedCyclesIndex.resize(ResourceCount);
2310 ExecutedResCounts.resize(ResourceCount);
2311 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2312 unsigned NumUnits = 0;
2313
2314 for (unsigned i = 0; i < ResourceCount; ++i) {
2315 ReservedCyclesIndex[i] = NumUnits;
2316 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2317 if (isUnbufferedGroup(i)) {
2318 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2319 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2320 U != UE; ++U)
2321 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2322 }
2323 }
2324
2325 ReservedCycles.resize(NumUnits, InvalidCycle);
2326 }
2327}
2328
2329/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2330/// these "soft stalls" differently than the hard stall cycles based on CPU
2331/// resources and computed by checkHazard(). A fully in-order model
2332/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2333/// available for scheduling until they are ready. However, a weaker in-order
2334/// model may use this for heuristics. For example, if a processor has in-order
2335/// behavior when reading certain resources, this may come into play.
2337 if (!SU->isUnbuffered)
2338 return 0;
2339
2340 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2341 if (ReadyCycle > CurrCycle)
2342 return ReadyCycle - CurrCycle;
2343 return 0;
2344}
2345
2346/// Compute the next cycle at which the given processor resource unit
2347/// can be scheduled.
2349 unsigned ReleaseAtCycle,
2350 unsigned AcquireAtCycle) {
2352 if (isTop())
2353 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2354 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2355
2356 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2357 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2358 }
2359
2360 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2361 // If this resource has never been used, always return cycle zero.
2362 if (NextUnreserved == InvalidCycle)
2363 return CurrCycle;
2364 // For bottom-up scheduling add the cycles needed for the current operation.
2365 if (!isTop())
2366 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2367 return NextUnreserved;
2368}
2369
2370/// Compute the next cycle at which the given processor resource can be
2371/// scheduled. Returns the next cycle and the index of the processor resource
2372/// instance in the reserved cycles vector.
2373std::pair<unsigned, unsigned>
2375 unsigned ReleaseAtCycle,
2376 unsigned AcquireAtCycle) {
2378 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2380 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2381 }
2382 unsigned MinNextUnreserved = InvalidCycle;
2383 unsigned InstanceIdx = 0;
2384 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2385 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2386 assert(NumberOfInstances > 0 &&
2387 "Cannot have zero instances of a ProcResource");
2388
2389 if (isUnbufferedGroup(PIdx)) {
2390 // If any subunits are used by the instruction, report that the
2391 // subunits of the resource group are available at the first cycle
2392 // in which the unit is available, effectively removing the group
2393 // record from hazarding and basing the hazarding decisions on the
2394 // subunit records. Otherwise, choose the first available instance
2395 // from among the subunits. Specifications which assign cycles to
2396 // both the subunits and the group or which use an unbuffered
2397 // group with buffered subunits will appear to schedule
2398 // strangely. In the first case, the additional cycles for the
2399 // group will be ignored. In the second, the group will be
2400 // ignored entirely.
2401 for (const MCWriteProcResEntry &PE :
2404 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2405 return std::make_pair(getNextResourceCycleByInstance(
2406 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2407 StartIndex);
2408
2409 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2410 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2411 unsigned NextUnreserved, NextInstanceIdx;
2412 std::tie(NextUnreserved, NextInstanceIdx) =
2413 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2414 if (MinNextUnreserved > NextUnreserved) {
2415 InstanceIdx = NextInstanceIdx;
2416 MinNextUnreserved = NextUnreserved;
2417 }
2418 }
2419 return std::make_pair(MinNextUnreserved, InstanceIdx);
2420 }
2421
2422 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2423 ++I) {
2424 unsigned NextUnreserved =
2425 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2427 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2428 << NextUnreserved << "c\n");
2429 if (MinNextUnreserved > NextUnreserved) {
2430 InstanceIdx = I;
2431 MinNextUnreserved = NextUnreserved;
2432 }
2433 }
2435 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2436 << "[" << InstanceIdx - StartIndex << "]"
2437 << " available @" << MinNextUnreserved << "c"
2438 << "\n");
2439 return std::make_pair(MinNextUnreserved, InstanceIdx);
2440}
2441
2442/// Does this SU have a hazard within the current instruction group.
2443///
2444/// The scheduler supports two modes of hazard recognition. The first is the
2445/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2446/// supports highly complicated in-order reservation tables
2447/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2448///
2449/// The second is a streamlined mechanism that checks for hazards based on
2450/// simple counters that the scheduler itself maintains. It explicitly checks
2451/// for instruction dispatch limitations, including the number of micro-ops that
2452/// can dispatch per cycle.
2453///
2454/// TODO: Also check whether the SU must start a new group.
2456 if (HazardRec->isEnabled()
2458 return true;
2459 }
2460
2461 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2462 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2463 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2464 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2465 return true;
2466 }
2467
2468 if (CurrMOps > 0 &&
2469 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2470 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2471 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2472 << (isTop() ? "begin" : "end") << " group\n");
2473 return true;
2474 }
2475
2477 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2478 for (const MCWriteProcResEntry &PE :
2481 unsigned ResIdx = PE.ProcResourceIdx;
2482 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2483 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2484 unsigned NRCycle, InstanceIdx;
2485 std::tie(NRCycle, InstanceIdx) =
2486 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2487 if (NRCycle > CurrCycle) {
2488#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2489 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2490#endif
2491 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2492 << SchedModel->getResourceName(ResIdx)
2493 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2494 << "=" << NRCycle << "c\n");
2495 return true;
2496 }
2497 }
2498 }
2499 return false;
2500}
2501
2502// Find the unscheduled node in ReadySUs with the highest latency.
2505 SUnit *LateSU = nullptr;
2506 unsigned RemLatency = 0;
2507 for (SUnit *SU : ReadySUs) {
2508 unsigned L = getUnscheduledLatency(SU);
2509 if (L > RemLatency) {
2510 RemLatency = L;
2511 LateSU = SU;
2512 }
2513 }
2514 if (LateSU) {
2515 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2516 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2517 }
2518 return RemLatency;
2519}
2520
2521// Count resources in this zone and the remaining unscheduled
2522// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2523// resource index, or zero if the zone is issue limited.
2525getOtherResourceCount(unsigned &OtherCritIdx) {
2526 OtherCritIdx = 0;
2528 return 0;
2529
2530 unsigned OtherCritCount = Rem->RemIssueCount
2531 + (RetiredMOps * SchedModel->getMicroOpFactor());
2532 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2533 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2534 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2535 PIdx != PEnd; ++PIdx) {
2536 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2537 if (OtherCount > OtherCritCount) {
2538 OtherCritCount = OtherCount;
2539 OtherCritIdx = PIdx;
2540 }
2541 }
2542 if (OtherCritIdx) {
2543 LLVM_DEBUG(
2544 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2545 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2546 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2547 }
2548 return OtherCritCount;
2549}
2550
2551void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2552 unsigned Idx) {
2553 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2554
2555#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2556 // ReadyCycle was been bumped up to the CurrCycle when this node was
2557 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2558 // scheduling, so may now be greater than ReadyCycle.
2559 if (ReadyCycle > CurrCycle)
2560 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2561#endif
2562
2563 if (ReadyCycle < MinReadyCycle)
2564 MinReadyCycle = ReadyCycle;
2565
2566 // Check for interlocks first. For the purpose of other heuristics, an
2567 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2568 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2569 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2571
2572 if (!HazardDetected) {
2573 Available.push(SU);
2574
2575 if (InPQueue)
2577 return;
2578 }
2579
2580 if (!InPQueue)
2581 Pending.push(SU);
2582}
2583
2584/// Move the boundary of scheduled code by one cycle.
2585void SchedBoundary::bumpCycle(unsigned NextCycle) {
2586 if (SchedModel->getMicroOpBufferSize() == 0) {
2587 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2588 "MinReadyCycle uninitialized");
2589 if (MinReadyCycle > NextCycle)
2590 NextCycle = MinReadyCycle;
2591 }
2592 // Update the current micro-ops, which will issue in the next cycle.
2593 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2594 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2595
2596 // Decrement DependentLatency based on the next cycle.
2597 if ((NextCycle - CurrCycle) > DependentLatency)
2598 DependentLatency = 0;
2599 else
2600 DependentLatency -= (NextCycle - CurrCycle);
2601
2602 if (!HazardRec->isEnabled()) {
2603 // Bypass HazardRec virtual calls.
2604 CurrCycle = NextCycle;
2605 } else {
2606 // Bypass getHazardType calls in case of long latency.
2607 for (; CurrCycle != NextCycle; ++CurrCycle) {
2608 if (isTop())
2610 else
2612 }
2613 }
2614 CheckPending = true;
2615 IsResourceLimited =
2617 getScheduledLatency(), true);
2618
2619 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2620 << '\n');
2621}
2622
2623void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2624 ExecutedResCounts[PIdx] += Count;
2625 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2626 MaxExecutedResCount = ExecutedResCounts[PIdx];
2627}
2628
2629/// Add the given processor resource to this scheduled zone.
2630///
2631/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2632/// cycles during which this resource is released.
2633///
2634/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2635/// cycles at which the resource is aquired after issue (assuming no stalls).
2636///
2637/// \return the next cycle at which the instruction may execute without
2638/// oversubscribing resources.
2639unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2640 unsigned ReleaseAtCycle,
2641 unsigned NextCycle,
2642 unsigned AcquireAtCycle) {
2643 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2644 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2645 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2646 << ReleaseAtCycle << "x" << Factor << "u\n");
2647
2648 // Update Executed resources counts.
2649 incExecutedResources(PIdx, Count);
2650 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2651 Rem->RemainingCounts[PIdx] -= Count;
2652
2653 // Check if this resource exceeds the current critical resource. If so, it
2654 // becomes the critical resource.
2655 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2656 ZoneCritResIdx = PIdx;
2657 LLVM_DEBUG(dbgs() << " *** Critical resource "
2658 << SchedModel->getResourceName(PIdx) << ": "
2660 << "c\n");
2661 }
2662 // For reserved resources, record the highest cycle using the resource.
2663 unsigned NextAvailable, InstanceIdx;
2664 std::tie(NextAvailable, InstanceIdx) =
2665 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2666 if (NextAvailable > CurrCycle) {
2667 LLVM_DEBUG(dbgs() << " Resource conflict: "
2668 << SchedModel->getResourceName(PIdx)
2669 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2670 << " reserved until @" << NextAvailable << "\n");
2671 }
2672 return NextAvailable;
2673}
2674
2675/// Move the boundary of scheduled code by one SUnit.
2677 // Update the reservation table.
2678 if (HazardRec->isEnabled()) {
2679 if (!isTop() && SU->isCall) {
2680 // Calls are scheduled with their preceding instructions. For bottom-up
2681 // scheduling, clear the pipeline state before emitting.
2682 HazardRec->Reset();
2683 }
2685 // Scheduling an instruction may have made pending instructions available.
2686 CheckPending = true;
2687 }
2688 // checkHazard should prevent scheduling multiple instructions per cycle that
2689 // exceed the issue width.
2690 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2691 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2692 assert(
2693 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2694 "Cannot schedule this instruction's MicroOps in the current cycle.");
2695
2696 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2697 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2698
2699 unsigned NextCycle = CurrCycle;
2700 switch (SchedModel->getMicroOpBufferSize()) {
2701 case 0:
2702 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2703 break;
2704 case 1:
2705 if (ReadyCycle > NextCycle) {
2706 NextCycle = ReadyCycle;
2707 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2708 }
2709 break;
2710 default:
2711 // We don't currently model the OOO reorder buffer, so consider all
2712 // scheduled MOps to be "retired". We do loosely model in-order resource
2713 // latency. If this instruction uses an in-order resource, account for any
2714 // likely stall cycles.
2715 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2716 NextCycle = ReadyCycle;
2717 break;
2718 }
2719 RetiredMOps += IncMOps;
2720
2721 // Update resource counts and critical resource.
2723 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2724 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2725 Rem->RemIssueCount -= DecRemIssue;
2726 if (ZoneCritResIdx) {
2727 // Scale scheduled micro-ops for comparing with the critical resource.
2728 unsigned ScaledMOps =
2729 RetiredMOps * SchedModel->getMicroOpFactor();
2730
2731 // If scaled micro-ops are now more than the previous critical resource by
2732 // a full cycle, then micro-ops issue becomes critical.
2733 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2734 >= (int)SchedModel->getLatencyFactor()) {
2735 ZoneCritResIdx = 0;
2736 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2737 << ScaledMOps / SchedModel->getLatencyFactor()
2738 << "c\n");
2739 }
2740 }
2743 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2744 unsigned RCycle =
2745 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2746 PI->AcquireAtCycle);
2747 if (RCycle > NextCycle)
2748 NextCycle = RCycle;
2749 }
2750 if (SU->hasReservedResource) {
2751 // For reserved resources, record the highest cycle using the resource.
2752 // For top-down scheduling, this is the cycle in which we schedule this
2753 // instruction plus the number of cycles the operations reserves the
2754 // resource. For bottom-up is it simply the instruction's cycle.
2757 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2758 unsigned PIdx = PI->ProcResourceIdx;
2759 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2760
2762 unsigned ReservedUntil, InstanceIdx;
2763 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2764 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2765 if (isTop()) {
2766 ReservedResourceSegments[InstanceIdx].add(
2768 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2770 } else {
2771 ReservedResourceSegments[InstanceIdx].add(
2773 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2775 }
2776 } else {
2777
2778 unsigned ReservedUntil, InstanceIdx;
2779 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2780 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2781 if (isTop()) {
2782 ReservedCycles[InstanceIdx] =
2783 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2784 } else
2785 ReservedCycles[InstanceIdx] = NextCycle;
2786 }
2787 }
2788 }
2789 }
2790 }
2791 // Update ExpectedLatency and DependentLatency.
2792 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2793 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2794 if (SU->getDepth() > TopLatency) {
2795 TopLatency = SU->getDepth();
2796 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2797 << SU->NodeNum << ") " << TopLatency << "c\n");
2798 }
2799 if (SU->getHeight() > BotLatency) {
2800 BotLatency = SU->getHeight();
2801 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2802 << SU->NodeNum << ") " << BotLatency << "c\n");
2803 }
2804 // If we stall for any reason, bump the cycle.
2805 if (NextCycle > CurrCycle)
2806 bumpCycle(NextCycle);
2807 else
2808 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2809 // resource limited. If a stall occurred, bumpCycle does this.
2810 IsResourceLimited =
2812 getScheduledLatency(), true);
2813
2814 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2815 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2816 // one cycle. Since we commonly reach the max MOps here, opportunistically
2817 // bump the cycle to avoid uselessly checking everything in the readyQ.
2818 CurrMOps += IncMOps;
2819
2820 // Bump the cycle count for issue group constraints.
2821 // This must be done after NextCycle has been adjust for all other stalls.
2822 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2823 // currCycle to X.
2824 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2825 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2826 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2827 << " group\n");
2828 bumpCycle(++NextCycle);
2829 }
2830
2831 while (CurrMOps >= SchedModel->getIssueWidth()) {
2832 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2833 << CurrCycle << '\n');
2834 bumpCycle(++NextCycle);
2835 }
2837}
2838
2839/// Release pending ready nodes in to the available queue. This makes them
2840/// visible to heuristics.
2842 // If the available queue is empty, it is safe to reset MinReadyCycle.
2843 if (Available.empty())
2844 MinReadyCycle = std::numeric_limits<unsigned>::max();
2845
2846 // Check to see if any of the pending instructions are ready to issue. If
2847 // so, add them to the available queue.
2848 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2849 SUnit *SU = *(Pending.begin() + I);
2850 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2851
2852 if (ReadyCycle < MinReadyCycle)
2853 MinReadyCycle = ReadyCycle;
2854
2855 if (Available.size() >= ReadyListLimit)
2856 break;
2857
2858 releaseNode(SU, ReadyCycle, true, I);
2859 if (E != Pending.size()) {
2860 --I;
2861 --E;
2862 }
2863 }
2864 CheckPending = false;
2865}
2866
2867/// Remove SU from the ready set for this boundary.
2869 if (Available.isInQueue(SU))
2871 else {
2872 assert(Pending.isInQueue(SU) && "bad ready count");
2874 }
2875}
2876
2877/// If this queue only has one ready candidate, return it. As a side effect,
2878/// defer any nodes that now hit a hazard, and advance the cycle until at least
2879/// one node is ready. If multiple instructions are ready, return NULL.
2881 if (CheckPending)
2883
2884 // Defer any ready instrs that now have a hazard.
2886 if (checkHazard(*I)) {
2887 Pending.push(*I);
2888 I = Available.remove(I);
2889 continue;
2890 }
2891 ++I;
2892 }
2893 for (unsigned i = 0; Available.empty(); ++i) {
2894// FIXME: Re-enable assert once PR20057 is resolved.
2895// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2896// "permanent hazard");
2897 (void)i;
2898 bumpCycle(CurrCycle + 1);
2900 }
2901
2904
2905 if (Available.size() == 1)
2906 return *Available.begin();
2907 return nullptr;
2908}
2909
2910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2911
2912/// Dump the content of the \ref ReservedCycles vector for the
2913/// resources that are used in the basic block.
2914///
2917 return;
2918
2919 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2920 unsigned StartIdx = 0;
2921
2922 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2923 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2924 std::string ResName = SchedModel->getResourceName(ResIdx);
2925 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2926 dbgs() << ResName << "(" << UnitIdx << ") = ";
2928 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2929 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2930 else
2931 dbgs() << "{ }\n";
2932 } else
2933 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2934 }
2935 StartIdx += NumUnits;
2936 }
2937}
2938
2939// This is useful information to dump after bumpNode.
2940// Note that the Queue contents are more useful before pickNodeFromQueue.
2942 unsigned ResFactor;
2943 unsigned ResCount;
2944 if (ZoneCritResIdx) {
2945 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2946 ResCount = getResourceCount(ZoneCritResIdx);
2947 } else {
2948 ResFactor = SchedModel->getMicroOpFactor();
2949 ResCount = RetiredMOps * ResFactor;
2950 }
2951 unsigned LFactor = SchedModel->getLatencyFactor();
2952 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2953 << " Retired: " << RetiredMOps;
2954 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2955 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2956 << ResCount / ResFactor << " "
2957 << SchedModel->getResourceName(ZoneCritResIdx)
2958 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2959 << (IsResourceLimited ? " - Resource" : " - Latency")
2960 << " limited.\n";
2963}
2964#endif
2965
2966//===----------------------------------------------------------------------===//
2967// GenericScheduler - Generic implementation of MachineSchedStrategy.
2968//===----------------------------------------------------------------------===//
2969
2972 const TargetSchedModel *SchedModel) {
2974 return;
2975
2976 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2979 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2980 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2981 ResDelta.CritResources += PI->ReleaseAtCycle;
2982 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2983 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2984 }
2985}
2986
2987/// Compute remaining latency. We need this both to determine whether the
2988/// overall schedule has become latency-limited and whether the instructions
2989/// outside this zone are resource or latency limited.
2990///
2991/// The "dependent" latency is updated incrementally during scheduling as the
2992/// max height/depth of scheduled nodes minus the cycles since it was
2993/// scheduled:
2994/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2995///
2996/// The "independent" latency is the max ready queue depth:
2997/// ILat = max N.depth for N in Available|Pending
2998///
2999/// RemainingLatency is the greater of independent and dependent latency.
3000///
3001/// These computations are expensive, especially in DAGs with many edges, so
3002/// only do them if necessary.
3003static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3004 unsigned RemLatency = CurrZone.getDependentLatency();
3005 RemLatency = std::max(RemLatency,
3006 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3007 RemLatency = std::max(RemLatency,
3008 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3009 return RemLatency;
3010}
3011
3012/// Returns true if the current cycle plus remaning latency is greater than
3013/// the critical path in the scheduling region.
3014bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3015 SchedBoundary &CurrZone,
3016 bool ComputeRemLatency,
3017 unsigned &RemLatency) const {
3018 // The current cycle is already greater than the critical path, so we are
3019 // already latency limited and don't need to compute the remaining latency.
3020 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3021 return true;
3022
3023 // If we haven't scheduled anything yet, then we aren't latency limited.
3024 if (CurrZone.getCurrCycle() == 0)
3025 return false;
3026
3027 if (ComputeRemLatency)
3028 RemLatency = computeRemLatency(CurrZone);
3029
3030 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3031}
3032
3033/// Set the CandPolicy given a scheduling zone given the current resources and
3034/// latencies inside and outside the zone.
3036 SchedBoundary &CurrZone,
3037 SchedBoundary *OtherZone) {
3038 // Apply preemptive heuristics based on the total latency and resources
3039 // inside and outside this zone. Potential stalls should be considered before
3040 // following this policy.
3041
3042 // Compute the critical resource outside the zone.
3043 unsigned OtherCritIdx = 0;
3044 unsigned OtherCount =
3045 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3046
3047 bool OtherResLimited = false;
3048 unsigned RemLatency = 0;
3049 bool RemLatencyComputed = false;
3050 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3051 RemLatency = computeRemLatency(CurrZone);
3052 RemLatencyComputed = true;
3053 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3054 OtherCount, RemLatency, false);
3055 }
3056
3057 // Schedule aggressively for latency in PostRA mode. We don't check for
3058 // acyclic latency during PostRA, and highly out-of-order processors will
3059 // skip PostRA scheduling.
3060 if (!OtherResLimited &&
3061 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3062 RemLatency))) {
3063 Policy.ReduceLatency |= true;
3064 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3065 << " RemainingLatency " << RemLatency << " + "
3066 << CurrZone.getCurrCycle() << "c > CritPath "
3067 << Rem.CriticalPath << "\n");
3068 }
3069 // If the same resource is limiting inside and outside the zone, do nothing.
3070 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3071 return;
3072
3073 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3074 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3075 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3076 } if (OtherResLimited) dbgs()
3077 << " RemainingLimit: "
3078 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3079 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3080 << " Latency limited both directions.\n");
3081
3082 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3083 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3084
3085 if (OtherResLimited)
3086 Policy.DemandResIdx = OtherCritIdx;
3087}
3088
3089#ifndef NDEBUG
3092 switch (Reason) {
3093 case NoCand: return "NOCAND ";
3094 case Only1: return "ONLY1 ";
3095 case PhysReg: return "PHYS-REG ";
3096 case RegExcess: return "REG-EXCESS";
3097 case RegCritical: return "REG-CRIT ";
3098 case Stall: return "STALL ";
3099 case Cluster: return "CLUSTER ";
3100 case Weak: return "WEAK ";
3101 case RegMax: return "REG-MAX ";
3102 case ResourceReduce: return "RES-REDUCE";
3103 case ResourceDemand: return "RES-DEMAND";
3104 case TopDepthReduce: return "TOP-DEPTH ";
3105 case TopPathReduce: return "TOP-PATH ";
3106 case BotHeightReduce:return "BOT-HEIGHT";
3107 case BotPathReduce: return "BOT-PATH ";
3108 case NextDefUse: return "DEF-USE ";
3109 case NodeOrder: return "ORDER ";
3110 };
3111 llvm_unreachable("Unknown reason!");
3112}
3113
3116 unsigned ResIdx = 0;
3117 unsigned Latency = 0;
3118 switch (Cand.Reason) {
3119 default:
3120 break;
3121 case RegExcess:
3122 P = Cand.RPDelta.Excess;
3123 break;
3124 case RegCritical:
3125 P = Cand.RPDelta.CriticalMax;
3126 break;
3127 case RegMax:
3128 P = Cand.RPDelta.CurrentMax;
3129 break;
3130 case ResourceReduce:
3131 ResIdx = Cand.Policy.ReduceResIdx;
3132 break;
3133 case ResourceDemand:
3134 ResIdx = Cand.Policy.DemandResIdx;
3135 break;
3136 case TopDepthReduce:
3137 Latency = Cand.SU->getDepth();
3138 break;
3139 case TopPathReduce:
3140 Latency = Cand.SU->getHeight();
3141 break;
3142 case BotHeightReduce:
3143 Latency = Cand.SU->getHeight();
3144 break;
3145 case BotPathReduce:
3146 Latency = Cand.SU->getDepth();
3147 break;
3148 }
3149 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3150 if (P.isValid())
3151 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3152 << ":" << P.getUnitInc() << " ";
3153 else
3154 dbgs() << " ";
3155 if (ResIdx)
3156 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3157 else
3158 dbgs() << " ";
3159 if (Latency)
3160 dbgs() << " " << Latency << " cycles ";
3161 else
3162 dbgs() << " ";
3163 dbgs() << '\n';
3164}
3165#endif
3166
3167namespace llvm {
3168/// Return true if this heuristic determines order.
3169/// TODO: Consider refactor return type of these functions as integer or enum,
3170/// as we may need to differentiate whether TryCand is better than Cand.
3171bool tryLess(int TryVal, int CandVal,
3175 if (TryVal < CandVal) {
3176 TryCand.Reason = Reason;
3177 return true;
3178 }
3179 if (TryVal > CandVal) {
3180 if (Cand.Reason > Reason)
3181 Cand.Reason = Reason;
3182 return true;
3183 }
3184 return false;
3185}
3186
3187bool tryGreater(int TryVal, int CandVal,
3191 if (TryVal > CandVal) {
3192 TryCand.Reason = Reason;
3193 return true;
3194 }
3195 if (TryVal < CandVal) {
3196 if (Cand.Reason > Reason)
3197 Cand.Reason = Reason;
3198 return true;
3199 }
3200 return false;
3201}
3202
3205 SchedBoundary &Zone) {
3206 if (Zone.isTop()) {
3207 // Prefer the candidate with the lesser depth, but only if one of them has
3208 // depth greater than the total latency scheduled so far, otherwise either
3209 // of them could be scheduled now with no stall.
3210 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3211 Zone.getScheduledLatency()) {
3212 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3214 return true;
3215 }
3216 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3218 return true;
3219 } else {
3220 // Prefer the candidate with the lesser height, but only if one of them has
3221 // height greater than the total latency scheduled so far, otherwise either
3222 // of them could be scheduled now with no stall.
3223 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3224 Zone.getScheduledLatency()) {
3225 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3227 return true;
3228 }
3229 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3231 return true;
3232 }
3233 return false;
3234}
3235} // end namespace llvm
3236
3237static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3238 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3239 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3240}
3241
3243 tracePick(Cand.Reason, Cand.AtTop);
3244}
3245
3247 assert(dag->hasVRegLiveness() &&
3248 "(PreRA)GenericScheduler needs vreg liveness");
3249 DAG = static_cast<ScheduleDAGMILive*>(dag);
3250 SchedModel = DAG->getSchedModel();
3251 TRI = DAG->TRI;
3252
3253 if (RegionPolicy.ComputeDFSResult)
3254 DAG->computeDFSResult();
3255
3256 Rem.init(DAG, SchedModel);
3257 Top.init(DAG, SchedModel, &Rem);
3258 Bot.init(DAG, SchedModel, &Rem);
3259
3260 // Initialize resource counts.
3261
3262 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3263 // are disabled, then these HazardRecs will be disabled.
3265 if (!Top.HazardRec) {
3266 Top.HazardRec =
3268 Itin, DAG);
3269 }
3270 if (!Bot.HazardRec) {
3271 Bot.HazardRec =
3273 Itin, DAG);
3274 }
3275 TopCand.SU = nullptr;
3276 BotCand.SU = nullptr;
3277}
3278
3279/// Initialize the per-region scheduling policy.
3282 unsigned NumRegionInstrs) {
3283 const MachineFunction &MF = *Begin->getMF();
3284 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3285
3286 // Avoid setting up the register pressure tracker for small regions to save
3287 // compile time. As a rough heuristic, only track pressure when the number of
3288 // schedulable instructions exceeds half the integer register file.
3289 RegionPolicy.ShouldTrackPressure = true;
3290 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
3292 if (TLI->isTypeLegal(LegalIntVT)) {
3293 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3294 TLI->getRegClassFor(LegalIntVT));
3295 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3296 }
3297 }
3298
3299 // For generic targets, we default to bottom-up, because it's simpler and more
3300 // compile-time optimizations have been implemented in that direction.
3301 RegionPolicy.OnlyBottomUp = true;
3302
3303 // Allow the subtarget to override default policy.
3304 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3305
3306 // After subtarget overrides, apply command line options.
3307 if (!EnableRegPressure) {
3308 RegionPolicy.ShouldTrackPressure = false;
3309 RegionPolicy.ShouldTrackLaneMasks = false;
3310 }
3311
3312 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3313 // e.g. -misched-bottomup=false allows scheduling in both directions.
3315 "-misched-topdown incompatible with -misched-bottomup");
3316 if (ForceBottomUp.getNumOccurrences() > 0) {
3317 RegionPolicy.OnlyBottomUp = ForceBottomUp;
3318 if (RegionPolicy.OnlyBottomUp)
3319 RegionPolicy.OnlyTopDown = false;
3320 }
3321 if (ForceTopDown.getNumOccurrences() > 0) {
3322 RegionPolicy.OnlyTopDown = ForceTopDown;
3323 if (RegionPolicy.OnlyTopDown)
3324 RegionPolicy.OnlyBottomUp = false;
3325 }
3326}
3327
3329 // Cannot completely remove virtual function even in release mode.
3330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3331 dbgs() << "GenericScheduler RegionPolicy: "
3332 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3333 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3334 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3335 << "\n";
3336#endif
3337}
3338
3339/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3340/// critical path by more cycles than it takes to drain the instruction buffer.
3341/// We estimate an upper bounds on in-flight instructions as:
3342///
3343/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3344/// InFlightIterations = AcyclicPath / CyclesPerIteration
3345/// InFlightResources = InFlightIterations * LoopResources
3346///
3347/// TODO: Check execution resources in addition to IssueCount.
3350 return;
3351
3352 // Scaled number of cycles per loop iteration.
3353 unsigned IterCount =
3356 // Scaled acyclic critical path.
3357 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3358 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3359 unsigned InFlightCount =
3360 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3361 unsigned BufferLimit =
3363
3364 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3365
3366 LLVM_DEBUG(
3367 dbgs() << "IssueCycles="
3369 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3370 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3371 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3372 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3373 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3374}
3375
3378
3379 // Some roots may not feed into ExitSU. Check all of them in case.
3380 for (const SUnit *SU : Bot.Available) {
3381 if (SU->getDepth() > Rem.CriticalPath)
3382 Rem.CriticalPath = SU->getDepth();
3383 }
3384 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3386 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3387 }
3388
3390 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3391 checkAcyclicLatency();
3392 }
3393}
3394
3395namespace llvm {
3397 const PressureChange &CandP,
3401 const TargetRegisterInfo *TRI,
3402 const MachineFunction &MF) {
3403 // If one candidate decreases and the other increases, go with it.
3404 // Invalid candidates have UnitInc==0.
3405 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3406 Reason)) {
3407 return true;
3408 }
3409 // Do not compare the magnitude of pressure changes between top and bottom
3410 // boundary.
3411 if (Cand.AtTop != TryCand.AtTop)
3412 return false;
3413
3414 // If both candidates affect the same set in the same boundary, go with the
3415 // smallest increase.
3416 unsigned TryPSet = TryP.getPSetOrMax();
3417 unsigned CandPSet = CandP.getPSetOrMax();
3418 if (TryPSet == CandPSet) {
3419 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3420 Reason);
3421 }
3422
3423 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3424 std::numeric_limits<int>::max();
3425
3426 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3427 std::numeric_limits<int>::max();
3428
3429 // If the candidates are decreasing pressure, reverse priority.
3430 if (TryP.getUnitInc() < 0)
3431 std::swap(TryRank, CandRank);
3432 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3433}
3434
3435unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3436 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3437}
3438
3439/// Minimize physical register live ranges. Regalloc wants them adjacent to
3440/// their physreg def/use.
3441///
3442/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3443/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3444/// with the operation that produces or consumes the physreg. We'll do this when
3445/// regalloc has support for parallel copies.
3446int biasPhysReg(const SUnit *SU, bool isTop) {
3447 const MachineInstr *MI = SU->getInstr();
3448
3449 if (MI->isCopy()) {
3450 unsigned ScheduledOper = isTop ? 1 : 0;
3451 unsigned UnscheduledOper = isTop ? 0 : 1;
3452 // If we have already scheduled the physreg produce/consumer, immediately
3453 // schedule the copy.
3454 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3455 return 1;
3456 // If the physreg is at the boundary, defer it. Otherwise schedule it
3457 // immediately to free the dependent. We can hoist the copy later.
3458 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3459 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3460 return AtBoundary ? -1 : 1;
3461 }
3462
3463 if (MI->isMoveImmediate()) {
3464 // If we have a move immediate and all successors have been assigned, bias
3465 // towards scheduling this later. Make sure all register defs are to
3466 // physical registers.
3467 bool DoBias = true;
3468 for (const MachineOperand &Op : MI->defs()) {
3469 if (Op.isReg() && !Op.getReg().isPhysical()) {
3470 DoBias = false;
3471 break;
3472 }
3473 }
3474
3475 if (DoBias)
3476 return isTop ? -1 : 1;
3477 }
3478
3479 return 0;
3480}
3481} // end namespace llvm
3482
3484 bool AtTop,
3485 const RegPressureTracker &RPTracker,
3486 RegPressureTracker &TempTracker) {
3487 Cand.SU = SU;
3488 Cand.AtTop = AtTop;
3489 if (DAG->isTrackingPressure()) {
3490 if (AtTop) {
3491 TempTracker.getMaxDownwardPressureDelta(
3492 Cand.SU->getInstr(),
3493 Cand.RPDelta,
3494 DAG->getRegionCriticalPSets(),
3495 DAG->getRegPressure().MaxSetPressure);
3496 } else {
3497 if (VerifyScheduling) {
3498 TempTracker.getMaxUpwardPressureDelta(
3499 Cand.SU->getInstr(),
3500 &DAG->getPressureDiff(Cand.SU),
3501 Cand.RPDelta,
3502 DAG->getRegionCriticalPSets(),
3503 DAG->getRegPressure().MaxSetPressure);
3504 } else {
3505 RPTracker.getUpwardPressureDelta(
3506 Cand.SU->getInstr(),
3507 DAG->getPressureDiff(Cand.SU),
3508 Cand.RPDelta,
3509 DAG->getRegionCriticalPSets(),
3510 DAG->getRegPressure().MaxSetPressure);
3511 }
3512 }
3513 }
3514 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3515 << " Try SU(" << Cand.SU->NodeNum << ") "
3517 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3518}
3519
3520/// Apply a set of heuristics to a new candidate. Heuristics are currently
3521/// hierarchical. This may be more efficient than a graduated cost model because
3522/// we don't need to evaluate all aspects of the model for each node in the
3523/// queue. But it's really done to make the heuristics easier to debug and
3524/// statistically analyze.
3525///
3526/// \param Cand provides the policy and current best candidate.
3527/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3528/// \param Zone describes the scheduled zone that we are extending, or nullptr
3529/// if Cand is from a different zone than TryCand.
3530/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3532 SchedCandidate &TryCand,
3533 SchedBoundary *Zone) const {
3534 // Initialize the candidate if needed.
3535 if (!Cand.isValid()) {
3536 TryCand.Reason = NodeOrder;
3537 return true;
3538 }
3539
3540 // Bias PhysReg Defs and copies to their uses and defined respectively.
3541 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3542 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3543 return TryCand.Reason != NoCand;
3544
3545 // Avoid exceeding the target's limit.
3546 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3547 Cand.RPDelta.Excess,
3548 TryCand, Cand, RegExcess, TRI,
3549 DAG->MF))
3550 return TryCand.Reason != NoCand;
3551
3552 // Avoid increasing the max critical pressure in the scheduled region.
3553 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3554 Cand.RPDelta.CriticalMax,
3555 TryCand, Cand, RegCritical, TRI,
3556 DAG->MF))
3557 return TryCand.Reason != NoCand;
3558
3559 // We only compare a subset of features when comparing nodes between
3560 // Top and Bottom boundary. Some properties are simply incomparable, in many
3561 // other instances we should only override the other boundary if something
3562 // is a clear good pick on one boundary. Skip heuristics that are more
3563 // "tie-breaking" in nature.
3564 bool SameBoundary = Zone != nullptr;
3565 if (SameBoundary) {
3566 // For loops that are acyclic path limited, aggressively schedule for
3567 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3568 // heuristics to take precedence.
3569 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3570 tryLatency(TryCand, Cand, *Zone))
3571 return TryCand.Reason != NoCand;
3572
3573 // Prioritize instructions that read unbuffered resources by stall cycles.
3574 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3575 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3576 return TryCand.Reason != NoCand;
3577 }
3578
3579 // Keep clustered nodes together to encourage downstream peephole
3580 // optimizations which may reduce resource requirements.
3581 //
3582 // This is a best effort to set things up for a post-RA pass. Optimizations
3583 // like generating loads of multiple registers should ideally be done within
3584 // the scheduler pass by combining the loads during DAG postprocessing.
3585 const SUnit *CandNextClusterSU =
3586 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3587 const SUnit *TryCandNextClusterSU =
3588 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3589 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3590 Cand.SU == CandNextClusterSU,
3591 TryCand, Cand, Cluster))
3592 return TryCand.Reason != NoCand;
3593
3594 if (SameBoundary) {
3595 // Weak edges are for clustering and other constraints.
3596 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3597 getWeakLeft(Cand.SU, Cand.AtTop),
3598 TryCand, Cand, Weak))
3599 return TryCand.Reason != NoCand;
3600 }
3601
3602 // Avoid increasing the max pressure of the entire region.
3603 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3604 Cand.RPDelta.CurrentMax,
3605 TryCand, Cand, RegMax, TRI,
3606 DAG->MF))
3607 return TryCand.Reason != NoCand;
3608
3609 if (SameBoundary) {
3610 // Avoid critical resource consumption and balance the schedule.
3611 TryCand.initResourceDelta(DAG, SchedModel);
3613 TryCand, Cand, ResourceReduce))
3614 return TryCand.Reason != NoCand;
3617 TryCand, Cand, ResourceDemand))
3618 return TryCand.Reason != NoCand;
3619
3620 // Avoid serializing long latency dependence chains.
3621 // For acyclic path limited loops, latency was already checked above.
3622 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3623 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3624 return TryCand.Reason != NoCand;
3625
3626 // Fall through to original instruction order.
3627 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3628 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3629 TryCand.Reason = NodeOrder;
3630 return true;
3631 }
3632 }
3633
3634 return false;
3635}
3636
3637/// Pick the best candidate from the queue.
3638///
3639/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3640/// DAG building. To adjust for the current scheduling location we need to
3641/// maintain the number of vreg uses remaining to be top-scheduled.
3643 const CandPolicy &ZonePolicy,
3644 const RegPressureTracker &RPTracker,
3645 SchedCandidate &Cand) {
3646 // getMaxPressureDelta temporarily modifies the tracker.
3647 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3648
3649 ReadyQueue &Q = Zone.Available;
3650 for (SUnit *SU : Q) {
3651
3652 SchedCandidate TryCand(ZonePolicy);
3653 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3654 // Pass SchedBoundary only when comparing nodes from the same boundary.
3655 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3656 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3657 // Initialize resource delta if needed in case future heuristics query it.
3658 if (TryCand.ResDelta == SchedResourceDelta())
3659 TryCand.initResourceDelta(DAG, SchedModel);
3660 Cand.setBest(TryCand);
3662 }
3663 }
3664}
3665
3666/// Pick the best candidate node from either the top or bottom queue.
3668 // Schedule as far as possible in the direction of no choice. This is most
3669 // efficient, but also provides the best heuristics for CriticalPSets.
3670 if (SUnit *SU = Bot.pickOnlyChoice()) {
3671 IsTopNode = false;
3672 tracePick(Only1, false);
3673 return SU;
3674 }
3675 if (SUnit *SU = Top.pickOnlyChoice()) {
3676 IsTopNode = true;
3677 tracePick(Only1, true);
3678 return SU;
3679 }
3680 // Set the bottom-up policy based on the state of the current bottom zone and
3681 // the instructions outside the zone, including the top zone.
3682 CandPolicy BotPolicy;
3683 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3684 // Set the top-down policy based on the state of the current top zone and
3685 // the instructions outside the zone, including the bottom zone.
3686 CandPolicy TopPolicy;
3687 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3688
3689 // See if BotCand is still valid (because we previously scheduled from Top).
3690 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3691 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3692 BotCand.Policy != BotPolicy) {
3693 BotCand.reset(CandPolicy());
3694 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3695 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3696 } else {
3697 LLVM_DEBUG(traceCandidate(BotCand));
3698#ifndef NDEBUG
3699 if (VerifyScheduling) {
3700 SchedCandidate TCand;
3701 TCand.reset(CandPolicy());
3702 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3703 assert(TCand.SU == BotCand.SU &&
3704 "Last pick result should correspond to re-picking right now");
3705 }
3706#endif
3707 }
3708
3709 // Check if the top Q has a better candidate.
3710 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3711 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3712 TopCand.Policy != TopPolicy) {
3713 TopCand.reset(CandPolicy());
3714 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3715 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3716 } else {
3717 LLVM_DEBUG(traceCandidate(TopCand));
3718#ifndef NDEBUG
3719 if (VerifyScheduling) {
3720 SchedCandidate TCand;
3721 TCand.reset(CandPolicy());
3722 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3723 assert(TCand.SU == TopCand.SU &&
3724 "Last pick result should correspond to re-picking right now");
3725 }
3726#endif
3727 }
3728
3729 // Pick best from BotCand and TopCand.
3730 assert(BotCand.isValid());
3731 assert(TopCand.isValid());
3732 SchedCandidate Cand = BotCand;
3733 TopCand.Reason = NoCand;
3734 if (tryCandidate(Cand, TopCand, nullptr)) {
3735 Cand.setBest(TopCand);
3737 }
3738
3739 IsTopNode = Cand.AtTop;
3740 tracePick(Cand);
3741 return Cand.SU;
3742}
3743
3744/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3746 if (DAG->top() == DAG->bottom()) {
3747 assert(Top.Available.empty() && Top.Pending.empty() &&
3748 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3749 return nullptr;
3750 }
3751 SUnit *SU;
3752 do {
3753 if (RegionPolicy.OnlyTopDown) {
3754 SU = Top.pickOnlyChoice();
3755 if (!SU) {
3756 CandPolicy NoPolicy;
3757 TopCand.reset(NoPolicy);
3758 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3759 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3760 tracePick(TopCand);
3761 SU = TopCand.SU;
3762 }
3763 IsTopNode = true;
3764 } else if (RegionPolicy.OnlyBottomUp) {
3765 SU = Bot.pickOnlyChoice();
3766 if (!SU) {
3767 CandPolicy NoPolicy;
3768 BotCand.reset(NoPolicy);
3769 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3770 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3771 tracePick(BotCand);
3772 SU = BotCand.SU;
3773 }
3774 IsTopNode = false;
3775 } else {
3776 SU = pickNodeBidirectional(IsTopNode);
3777 }
3778 } while (SU->isScheduled);
3779
3780 if (SU->isTopReady())
3781 Top.removeReady(SU);
3782 if (SU->isBottomReady())
3783 Bot.removeReady(SU);
3784
3785 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3786 << *SU->getInstr());
3787 return SU;
3788}
3789
3791 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3792 if (!isTop)
3793 ++InsertPos;
3794 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3795
3796 // Find already scheduled copies with a single physreg dependence and move
3797 // them just above the scheduled instruction.
3798 for (SDep &Dep : Deps) {
3799 if (Dep.getKind() != SDep::Data ||
3800 !Register::isPhysicalRegister(Dep.getReg()))
3801 continue;
3802 SUnit *DepSU = Dep.getSUnit();
3803 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3804 continue;
3805 MachineInstr *Copy = DepSU->getInstr();
3806 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3807 continue;
3808 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3809 DAG->dumpNode(*Dep.getSUnit()));
3810 DAG->moveInstruction(Copy, InsertPos);
3811 }
3812}
3813
3814/// Update the scheduler's state after scheduling a node. This is the same node
3815/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3816/// update it's state based on the current cycle before MachineSchedStrategy
3817/// does.
3818///
3819/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3820/// them here. See comments in biasPhysReg.
3821void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3822 if (IsTopNode) {
3823 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3824 Top.bumpNode(SU);
3825 if (SU->hasPhysRegUses)
3826 reschedulePhysReg(SU, true);
3827 } else {
3828 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3829 Bot.bumpNode(SU);
3830 if (SU->hasPhysRegDefs)
3831 reschedulePhysReg(SU, false);
3832 }
3833}
3834
3835/// Create the standard converging machine scheduler. This will be used as the
3836/// default scheduler if the target does not set a default.
3838 ScheduleDAGMILive *DAG =
3839 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3840 // Register DAG post-processors.
3841 //
3842 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3843 // data and pass it to later mutations. Have a single mutation that gathers
3844 // the interesting nodes in one pass.
3846
3847 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3848 // Add MacroFusion mutation if fusions are not empty.
3849 const auto &MacroFusions = STI.getMacroFusions();
3850 if (!MacroFusions.empty())
3851 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3852 return DAG;
3853}
3854
3856 return createGenericSchedLive(C);
3857}
3858
3860GenericSchedRegistry("converge", "Standard converging scheduler.",
3862
3863//===----------------------------------------------------------------------===//
3864// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3865//===----------------------------------------------------------------------===//
3866
3868 DAG = Dag;
3869 SchedModel = DAG->getSchedModel();
3870 TRI = DAG->TRI;
3871
3872 Rem.init(DAG, SchedModel);
3873 Top.init(DAG, SchedModel, &Rem);
3874 Bot.init(DAG, SchedModel, &Rem);
3875
3876 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3877 // or are disabled, then these HazardRecs will be disabled.
3879 if (!Top.HazardRec) {
3880 Top.HazardRec =
3882 Itin, DAG);
3883 }
3884 if (!Bot.HazardRec) {
3885 Bot.HazardRec =
3887 Itin, DAG);
3888 }
3889}
3890
3893 unsigned NumRegionInstrs) {
3895 RegionPolicy.OnlyTopDown = true;
3896 RegionPolicy.OnlyBottomUp = false;
3898 RegionPolicy.OnlyTopDown = false;
3899 RegionPolicy.OnlyBottomUp = true;
3901 RegionPolicy.OnlyBottomUp = false;
3902 RegionPolicy.OnlyTopDown = false;
3903 }
3904}
3905
3908
3909 // Some roots may not feed into ExitSU. Check all of them in case.
3910 for (const SUnit *SU : Bot.Available) {
3911 if (SU->getDepth() > Rem.CriticalPath)
3912 Rem.CriticalPath = SU->getDepth();
3913 }
3914 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3916 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3917 }
3918}
3919
3920/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3921///
3922/// \param Cand provides the policy and current best candidate.
3923/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3924/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3926 SchedCandidate &TryCand) {
3927 // Initialize the candidate if needed.
3928 if (!Cand.isValid()) {
3929 TryCand.Reason = NodeOrder;
3930 return true;
3931 }
3932
3933 // Prioritize instructions that read unbuffered resources by stall cycles.
3934 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3935 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3936 return TryCand.Reason != NoCand;
3937
3938 // Keep clustered nodes together.
3939 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3940 Cand.SU == DAG->getNextClusterSucc(),
3941 TryCand, Cand, Cluster))
3942 return TryCand.Reason != NoCand;
3943
3944 // Avoid critical resource consumption and balance the schedule.
3946 TryCand, Cand, ResourceReduce))
3947 return TryCand.Reason != NoCand;
3950 TryCand, Cand, ResourceDemand))
3951 return TryCand.Reason != NoCand;
3952
3953 // Avoid serializing long latency dependence chains.
3954 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3955 return TryCand.Reason != NoCand;
3956 }
3957
3958 // Fall through to original instruction order.
3959 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3960 TryCand.Reason = NodeOrder;
3961 return true;
3962 }
3963
3964 return false;
3965}
3966
3968 SchedCandidate &Cand) {
3969 ReadyQueue &Q = Zone.Available;
3970 for (SUnit *SU : Q) {
3971 SchedCandidate TryCand(Cand.Policy);
3972 TryCand.SU = SU;
3973 TryCand.AtTop = Zone.isTop();
3974 TryCand.initResourceDelta(DAG, SchedModel);
3975 if (tryCandidate(Cand, TryCand)) {
3976 Cand.setBest(TryCand);
3978 }
3979 }
3980}
3981
3982/// Pick the best candidate node from either the top or bottom queue.
3984 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
3985 // out common parts.
3986
3987 // Schedule as far as possible in the direction of no choice. This is most
3988 // efficient, but also provides the best heuristics for CriticalPSets.
3989 if (SUnit *SU = Bot.pickOnlyChoice()) {
3990 IsTopNode = false;
3991 tracePick(Only1, false);
3992 return SU;
3993 }
3994 if (SUnit *SU = Top.pickOnlyChoice()) {
3995 IsTopNode = true;
3996 tracePick(Only1, true);
3997 return SU;
3998 }
3999 // Set the bottom-up policy based on the state of the current bottom zone and
4000 // the instructions outside the zone, including the top zone.
4001 CandPolicy BotPolicy;
4002 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4003 // Set the top-down policy based on the state of the current top zone and
4004 // the instructions outside the zone, including the bottom zone.
4005 CandPolicy TopPolicy;
4006 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4007
4008 // See if BotCand is still valid (because we previously scheduled from Top).
4009 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4010 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4011 BotCand.Policy != BotPolicy) {
4012 BotCand.reset(CandPolicy());
4013 pickNodeFromQueue(Bot, BotCand);
4014 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4015 } else {
4016 LLVM_DEBUG(traceCandidate(BotCand));
4017#ifndef NDEBUG
4018 if (VerifyScheduling) {
4019 SchedCandidate TCand;
4020 TCand.reset(CandPolicy());
4021 pickNodeFromQueue(Bot, BotCand);
4022 assert(TCand.SU == BotCand.SU &&
4023 "Last pick result should correspond to re-picking right now");
4024 }
4025#endif
4026 }
4027
4028 // Check if the top Q has a better candidate.
4029 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4030 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4031 TopCand.Policy != TopPolicy) {
4032 TopCand.reset(CandPolicy());
4033 pickNodeFromQueue(Top, TopCand);
4034 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4035 } else {
4036 LLVM_DEBUG(traceCandidate(TopCand));
4037#ifndef NDEBUG
4038 if (VerifyScheduling) {
4039 SchedCandidate TCand;
4040 TCand.reset(CandPolicy());
4041 pickNodeFromQueue(Top, TopCand);
4042 assert(TCand.SU == TopCand.SU &&
4043 "Last pick result should correspond to re-picking right now");
4044 }
4045#endif
4046 }
4047
4048 // Pick best from BotCand and TopCand.
4049 assert(BotCand.isValid());
4050 assert(TopCand.isValid());
4051 SchedCandidate Cand = BotCand;
4052 TopCand.Reason = NoCand;
4053 if (tryCandidate(Cand, TopCand)) {
4054 Cand.setBest(TopCand);
4056 }
4057
4058 IsTopNode = Cand.AtTop;
4059 tracePick(Cand);
4060 return Cand.SU;
4061}
4062
4063/// Pick the next node to schedule.
4065 if (DAG->top() == DAG->bottom()) {
4066 assert(Top.Available.empty() && Top.Pending.empty() &&
4067 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4068 return nullptr;
4069 }
4070 SUnit *SU;
4071 do {
4072 if (RegionPolicy.OnlyBottomUp) {
4073 SU = Bot.pickOnlyChoice();
4074 if (SU) {
4075 tracePick(Only1, true);
4076 } else {
4077 CandPolicy NoPolicy;
4078 BotCand.reset(NoPolicy);
4079 // Set the bottom-up policy based on the state of the current bottom
4080 // zone and the instructions outside the zone, including the top zone.
4081 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4082 pickNodeFromQueue(Bot, BotCand);
4083 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4084 tracePick(BotCand);
4085 SU = BotCand.SU;
4086 }
4087 IsTopNode = false;
4088 } else if (RegionPolicy.OnlyTopDown) {
4089 SU = Top.pickOnlyChoice();
4090 if (SU) {
4091 tracePick(Only1, true);
4092 } else {
4093 CandPolicy NoPolicy;
4094 TopCand.reset(NoPolicy);
4095 // Set the top-down policy based on the state of the current top zone
4096 // and the instructions outside the zone, including the bottom zone.
4097 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4098 pickNodeFromQueue(Top, TopCand);
4099 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4100 tracePick(TopCand);
4101 SU = TopCand.SU;
4102 }
4103 IsTopNode = true;
4104 } else {
4105 SU = pickNodeBidirectional(IsTopNode);
4106 }
4107 } while (SU->isScheduled);
4108
4109 if (SU->isTopReady())
4110 Top.removeReady(SU);
4111 if (SU->isBottomReady())
4112 Bot.removeReady(SU);
4113
4114 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4115 << *SU->getInstr());
4116 return SU;
4117}
4118
4119/// Called after ScheduleDAGMI has scheduled an instruction and updated
4120/// scheduled/remaining flags in the DAG nodes.
4121void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4122 if (IsTopNode) {
4123 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4124 Top.bumpNode(SU);
4125 } else {
4126 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4127 Bot.bumpNode(SU);
4128 }
4129}
4130
4132 ScheduleDAGMI *DAG =
4133 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4134 /*RemoveKillFlags=*/true);
4135 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4136 // Add MacroFusion mutation if fusions are not empty.
4137 const auto &MacroFusions = STI.getMacroFusions();
4138 if (!MacroFusions.empty())
4139 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4140 return DAG;
4141}
4142
4143//===----------------------------------------------------------------------===//
4144// ILP Scheduler. Currently for experimental analysis of heuristics.
4145//===----------------------------------------------------------------------===//
4146
4147namespace {
4148
4149/// Order nodes by the ILP metric.
4150struct ILPOrder {
4151 const SchedDFSResult *DFSResult = nullptr;
4152 const BitVector *ScheduledTrees = nullptr;
4153 bool MaximizeILP;
4154
4155 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4156
4157 /// Apply a less-than relation on node priority.
4158 ///
4159 /// (Return true if A comes after B in the Q.)
4160 bool operator()(const SUnit *A, const SUnit *B) const {
4161 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4162 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4163 if (SchedTreeA != SchedTreeB) {
4164 // Unscheduled trees have lower priority.
4165 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4166 return ScheduledTrees->test(SchedTreeB);
4167
4168 // Trees with shallower connections have lower priority.
4169 if (DFSResult->getSubtreeLevel(SchedTreeA)
4170 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4171 return DFSResult->getSubtreeLevel(SchedTreeA)
4172 < DFSResult->getSubtreeLevel(SchedTreeB);
4173 }
4174 }
4175 if (MaximizeILP)
4176 return DFSResult->getILP(A) < DFSResult->getILP(B);
4177 else
4178 return DFSResult->getILP(A) > DFSResult->getILP(B);
4179 }
4180};
4181
4182/// Schedule based on the ILP metric.
4183class ILPScheduler : public MachineSchedStrategy {
4184 ScheduleDAGMILive *DAG = nullptr;
4185 ILPOrder Cmp;
4186
4187 std::vector<SUnit*> ReadyQ;
4188
4189public:
4190 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4191
4192 void initialize(ScheduleDAGMI *dag) override {
4193 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4194 DAG = static_cast<ScheduleDAGMILive*>(dag);
4195 DAG->computeDFSResult();
4196 Cmp.DFSResult = DAG->getDFSResult();
4197 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4198 ReadyQ.clear();
4199 }
4200
4201 void registerRoots() override {
4202 // Restore the heap in ReadyQ with the updated DFS results.
4203 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4204 }
4205
4206 /// Implement MachineSchedStrategy interface.
4207 /// -----------------------------------------
4208
4209 /// Callback to select the highest priority node from the ready Q.
4210 SUnit *pickNode(bool &IsTopNode) override {
4211 if (ReadyQ.empty()) return nullptr;
4212 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4213 SUnit *SU = ReadyQ.back();
4214 ReadyQ.pop_back();
4215 IsTopNode = false;
4216 LLVM_DEBUG(dbgs() << "Pick node "
4217 << "SU(" << SU->NodeNum << ") "
4218 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4219 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4220 << " @"
4221 << DAG->getDFSResult()->getSubtreeLevel(
4222 DAG->getDFSResult()->getSubtreeID(SU))
4223 << '\n'
4224 << "Scheduling " << *SU->getInstr());
4225 return SU;
4226 }
4227
4228 /// Scheduler callback to notify that a new subtree is scheduled.
4229 void scheduleTree(unsigned SubtreeID) override {
4230 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4231 }
4232
4233 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4234 /// DFSResults, and resort the priority Q.
4235 void schedNode(SUnit *SU, bool IsTopNode) override {
4236 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4237 }
4238
4239 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4240
4241 void releaseBottomNode(SUnit *SU) override {
4242 ReadyQ.push_back(SU);
4243 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4244 }
4245};
4246
4247} // end anonymous namespace
4248
4250 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4251}
4253 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4254}
4255
4257 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4259 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4260
4261//===----------------------------------------------------------------------===//
4262// Machine Instruction Shuffler for Correctness Testing
4263//===----------------------------------------------------------------------===//
4264
4265#ifndef NDEBUG
4266namespace {
4267
4268/// Apply a less-than relation on the node order, which corresponds to the
4269/// instruction order prior to scheduling. IsReverse implements greater-than.
4270template<bool IsReverse>
4271struct SUnitOrder {
4272 bool operator()(SUnit *A, SUnit *B) const {
4273 if (IsReverse)
4274 return A->NodeNum > B->NodeNum;
4275 else
4276 return A->NodeNum < B->NodeNum;
4277 }
4278};
4279
4280/// Reorder instructions as much as possible.
4281class InstructionShuffler : public MachineSchedStrategy {
4282 bool IsAlternating;
4283 bool IsTopDown;
4284
4285 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4286 // gives nodes with a higher number higher priority causing the latest
4287 // instructions to be scheduled first.
4289 TopQ;
4290
4291 // When scheduling bottom-up, use greater-than as the queue priority.
4293 BottomQ;
4294
4295public:
4296 InstructionShuffler(bool alternate, bool topdown)
4297 : IsAlternating(alternate), IsTopDown(topdown) {}
4298
4299 void initialize(ScheduleDAGMI*) override {
4300 TopQ.clear();
4301 BottomQ.clear();
4302 }
4303
4304 /// Implement MachineSchedStrategy interface.
4305 /// -----------------------------------------
4306
4307 SUnit *pickNode(bool &IsTopNode) override {
4308 SUnit *SU;
4309 if (IsTopDown) {
4310 do {
4311 if (TopQ.empty()) return nullptr;
4312 SU = TopQ.top();
4313 TopQ.pop();
4314 } while (SU->isScheduled);
4315 IsTopNode = true;
4316 } else {
4317 do {
4318 if (BottomQ.empty()) return nullptr;
4319 SU = BottomQ.top();
4320 BottomQ.pop();
4321 } while (SU->isScheduled);
4322 IsTopNode = false;
4323 }
4324 if (IsAlternating)
4325 IsTopDown = !IsTopDown;
4326 return SU;
4327 }
4328
4329 void schedNode(SUnit *SU, bool IsTopNode) override {}
4330
4331 void releaseTopNode(SUnit *SU) override {
4332 TopQ.push(SU);
4333 }
4334 void releaseBottomNode(SUnit *SU) override {
4335 BottomQ.push(SU);
4336 }
4337};
4338
4339} // end anonymous namespace
4340
4342 bool Alternate = !ForceTopDown && !ForceBottomUp;
4343 bool TopDown = !ForceBottomUp;
4344 assert((TopDown || !ForceTopDown) &&
4345 "-misched-topdown incompatible with -misched-bottomup");
4346 return new ScheduleDAGMILive(
4347 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4348}
4349
4351 "shuffle", "Shuffle machine instructions alternating directions",
4353#endif // !NDEBUG
4354
4355//===----------------------------------------------------------------------===//
4356// GraphWriter support for ScheduleDAGMILive.
4357//===----------------------------------------------------------------------===//
4358
4359#ifndef NDEBUG
4360namespace llvm {
4361
4362template<> struct GraphTraits<
4364
4365template<>
4368
4369 static std::string getGraphName(const ScheduleDAG *G) {
4370 return std::string(G->MF.getName());
4371 }
4372
4374 return true;
4375 }
4376
4377 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4378 if (ViewMISchedCutoff == 0)
4379 return false;
4380 return (Node->Preds.size() > ViewMISchedCutoff
4381 || Node->Succs.size() > ViewMISchedCutoff);
4382 }
4383
4384 /// If you want to override the dot attributes printed for a particular
4385 /// edge, override this method.
4386 static std::string getEdgeAttributes(const SUnit *Node,
4387 SUnitIterator EI,
4388 const ScheduleDAG *Graph) {
4389 if (EI.isArtificialDep())
4390 return "color=cyan,style=dashed";
4391 if (EI.isCtrlDep())
4392 return "color=blue,style=dashed";
4393 return "";
4394 }
4395
4396 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4397 std::string Str;
4398 raw_string_ostream SS(Str);
4399 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4400 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4401 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4402 SS << "SU:" << SU->NodeNum;
4403 if (DFS)
4404 SS << " I:" << DFS->getNumInstrs(SU);
4405 return SS.str();
4406 }
4407
4408 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4409 return G->getGraphNodeLabel(SU);
4410 }
4411
4412 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4413 std::string Str("shape=Mrecord");
4414 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4415 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4416 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4417 if (DFS) {
4418 Str += ",style=filled,fillcolor=\"#";
4419 Str += DOT::getColorString(DFS->getSubtreeID(N));
4420 Str += '"';
4421 }
4422 return Str;
4423 }
4424};
4425
4426} // end namespace llvm
4427#endif // NDEBUG
4428
4429/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4430/// rendered using 'dot'.
4431void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4432#ifndef NDEBUG
4433 ViewGraph(this, Name, false, Title);
4434#else
4435 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4436 << "systems with Graphviz or gv!\n";
4437#endif // NDEBUG
4438}
4439
4440/// Out-of-line implementation with no arguments is handy for gdb.
4442 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4443}
4444
4445/// Sort predicate for the intervals stored in an instance of
4446/// ResourceSegments. Intervals are always disjoint (no intersection
4447/// for any pairs of intervals), therefore we can sort the totality of
4448/// the intervals by looking only at the left boundary.
4451 return A.first < B.first;
4452}
4453
4454unsigned ResourceSegments::getFirstAvailableAt(
4455 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4456 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4457 IntervalBuilder) const {
4458 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4459 sortIntervals) &&
4460 "Cannot execute on an un-sorted set of intervals.");
4461
4462 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4463 // a ResourceSegment interval for that situation.
4464 if (AcquireAtCycle == ReleaseAtCycle)
4465 return CurrCycle;
4466
4467 unsigned RetCycle = CurrCycle;
4468 ResourceSegments::IntervalTy NewInterval =
4469 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4470 for (auto &Interval : _Intervals) {
4471 if (!intersects(NewInterval, Interval))
4472 continue;
4473
4474 // Move the interval right next to the top of the one it
4475 // intersects.
4476 assert(Interval.second > NewInterval.first &&
4477 "Invalid intervals configuration.");
4478 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4479 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4480 }
4481 return RetCycle;
4482}
4483
4485 const unsigned CutOff) {
4486 assert(A.first <= A.second && "Cannot add negative resource usage");
4487 assert(CutOff > 0 && "0-size interval history has no use.");
4488 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4489 // instruction needed the resource to be available but does not use it.
4490 // However, ResourceSegment represents an interval that is closed on the left
4491 // and open on the right. It is impossible to represent an empty interval when
4492 // the left is closed. Do not add it to Intervals.
4493 if (A.first == A.second)
4494 return;
4495
4496 assert(all_of(_Intervals,
4497 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4498 return !intersects(A, Interval);
4499 }) &&
4500 "A resource is being overwritten");
4501 _Intervals.push_back(A);
4502
4503 sortAndMerge();
4504
4505 // Do not keep the full history of the intervals, just the
4506 // latest #CutOff.
4507 while (_Intervals.size() > CutOff)
4508 _Intervals.pop_front();
4509}
4510
4513 assert(A.first <= A.second && "Invalid interval");
4514 assert(B.first <= B.second && "Invalid interval");
4515
4516 // Share one boundary.
4517 if ((A.first == B.first) || (A.second == B.second))
4518 return true;
4519
4520 // full intersersect: [ *** ) B
4521 // [***) A
4522 if ((A.first > B.first) && (A.second < B.second))
4523 return true;
4524
4525 // right intersect: [ ***) B
4526 // [*** ) A
4527 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4528 return true;
4529
4530 // left intersect: [*** ) B
4531 // [ ***) A
4532 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4533 return true;
4534
4535 return false;
4536}
4537
4538void ResourceSegments::sortAndMerge() {
4539 if (_Intervals.size() <= 1)
4540 return;
4541
4542 // First sort the collection.
4543 _Intervals.sort(sortIntervals);
4544
4545 // can use next because I have at least 2 elements in the list
4546 auto next = std::next(std::begin(_Intervals));
4547 auto E = std::end(_Intervals);
4548 for (; next != E; ++next) {
4549 if (std::prev(next)->second >= next->first) {
4550 next->first = std::prev(next)->first;
4551 _Intervals.erase(std::prev(next));
4552 continue;
4553 }
4554 }
4555}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
#define P(N)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:36
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
bool isArtificialDep() const
Definition: ScheduleDAG.h:668
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:665
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:270
bool isBottomReady() const
Definition: ScheduleDAG.h:449
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
bool isTopReady() const
Definition: ScheduleDAG.h:446
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:179
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:267
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:90
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1316
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
void initializePostMachineSchedulerPass(PassRegistry &)
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
cl::opt< bool > ViewMISchedDAGs
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
cl::opt< MISchedPostRASched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISchedPostRASched::TopDown), cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
cl::opt< bool > ForceBottomUp
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineSchedulerPass(PassRegistry &)
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
cl::opt< bool > ForceTopDown
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.