LLVM 20.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
51#include "llvm/Config/llvm-config.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 cl::desc("Force top-down list scheduling"));
83 cl::desc("Force bottom-up list scheduling"));
84namespace MISchedPostRASched {
89};
90} // end namespace MISchedPostRASched
92 "misched-postra-direction", cl::Hidden,
93 cl::desc("Post reg-alloc list scheduling direction"),
94 // Default to top-down because it was implemented first and existing targets
95 // expect that behavior by default.
99 "Force top-down post reg-alloc list scheduling"),
101 "Force bottom-up post reg-alloc list scheduling"),
103 "Force bidirectional post reg-alloc list scheduling")));
106 cl::desc("Print critical path length to stdout"));
107
109 "verify-misched", cl::Hidden,
110 cl::desc("Verify machine instrs before and after machine scheduling"));
111
112#ifndef NDEBUG
114 "view-misched-dags", cl::Hidden,
115 cl::desc("Pop up a window to show MISched dags after they are processed"));
116cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117 cl::desc("Print schedule DAGs"));
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
120 cl::desc("Dump resource usage at schedule boundary."));
122 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
123 cl::desc("Show details of invoking getNextResoufceCycle."));
124#else
125const bool ViewMISchedDAGs = false;
126const bool PrintDAGs = false;
127const bool MischedDetailResourceBooking = false;
128#ifdef LLVM_ENABLE_DUMP
129const bool MISchedDumpReservedCycles = false;
130#endif // LLVM_ENABLE_DUMP
131#endif // NDEBUG
132
133} // end namespace llvm
134
135#ifndef NDEBUG
136/// In some situations a few uninteresting nodes depend on nearly all other
137/// nodes in the graph, provide a cutoff to hide them.
138static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
142 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
143
145 cl::desc("Only schedule this function"));
146static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147 cl::desc("Only schedule this MBB#"));
148#endif // NDEBUG
149
150/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151/// size of the ready lists.
153 cl::desc("Limit ready list to N instructions"), cl::init(256));
154
155static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156 cl::desc("Enable register pressure scheduling."), cl::init(true));
157
158static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
160
162 cl::desc("Enable memop clustering."),
163 cl::init(true));
164static cl::opt<bool>
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
166 cl::desc("Switch to fast cluster algorithm with the lost "
167 "of some fusion opportunities"),
168 cl::init(false));
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171 cl::desc("The threshold for fast cluster"),
172 cl::init(1000));
173
174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
177 cl::desc("Dump resource usage at schedule boundary."));
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180 cl::desc("Set width of the columns with "
181 "the resources and schedule units"),
182 cl::init(19));
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185 cl::desc("Set width of the columns showing resource booking."),
186 cl::init(5));
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
189 cl::desc("Sort the resources printed in the dump trace"));
190#endif
191
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194 cl::desc("Number of intervals to track"), cl::init(10));
195
196// DAG subtrees must have at least this many nodes.
197static const unsigned MinSubtreeSize = 8;
198
199// Pin the vtables to this file.
200void MachineSchedStrategy::anchor() {}
201
202void ScheduleDAGMutation::anchor() {}
203
204//===----------------------------------------------------------------------===//
205// Machine Instruction Scheduling Pass and Registry
206//===----------------------------------------------------------------------===//
207
210}
211
213 delete RegClassInfo;
214}
215
216namespace {
217
218/// Base class for a machine scheduler class that can run at any point.
219class MachineSchedulerBase : public MachineSchedContext,
220 public MachineFunctionPass {
221public:
222 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
223
224 void print(raw_ostream &O, const Module* = nullptr) const override;
225
226protected:
227 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228};
229
230/// MachineScheduler runs after coalescing and before register allocation.
231class MachineScheduler : public MachineSchedulerBase {
232public:
233 MachineScheduler();
234
235 void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237 bool runOnMachineFunction(MachineFunction&) override;
238
239 static char ID; // Class identification, replacement for typeinfo
240
241protected:
242 ScheduleDAGInstrs *createMachineScheduler();
243};
244
245/// PostMachineScheduler runs after shortly before code emission.
246class PostMachineScheduler : public MachineSchedulerBase {
247public:
248 PostMachineScheduler();
249
250 void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252 bool runOnMachineFunction(MachineFunction&) override;
253
254 static char ID; // Class identification, replacement for typeinfo
255
256protected:
257 ScheduleDAGInstrs *createPostMachineScheduler();
258};
259
260} // end anonymous namespace
261
262char MachineScheduler::ID = 0;
263
264char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
267 "Machine Instruction Scheduler", false, false)
275
276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
278}
279
280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281 AU.setPreservesCFG();
291}
292
293char PostMachineScheduler::ID = 0;
294
295char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298 "PostRA Machine Instruction Scheduler", false, false)
302INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
304
305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
307}
308
309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310 AU.setPreservesCFG();
316}
317
320
321/// A dummy default scheduler factory indicates whether the scheduler
322/// is overridden on the command line.
324 return nullptr;
325}
326
327/// MachineSchedOpt allows command line selection of the scheduler.
332 cl::desc("Machine instruction scheduler to use"));
333
335DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
337
339 "enable-misched",
340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
341 cl::Hidden);
342
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
346 cl::init(true), cl::Hidden);
347
348/// Decrement this iterator until reaching the top or a non-debug instr.
352 assert(I != Beg && "reached the top of the region, cannot decrement");
353 while (--I != Beg) {
354 if (!I->isDebugOrPseudoInstr())
355 break;
356 }
357 return I;
358}
359
360/// Non-const version.
366}
367
368/// If this iterator is a debug value, increment until reaching the End or a
369/// non-debug instruction.
373 for(; I != End; ++I) {
374 if (!I->isDebugOrPseudoInstr())
375 break;
376 }
377 return I;
378}
379
380/// Non-const version.
386}
387
388/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390 // Select the scheduler, or set the default.
392 if (Ctor != useDefaultMachineSched)
393 return Ctor(this);
394
395 // Get the default scheduler set by the target for this function.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
397 if (Scheduler)
398 return Scheduler;
399
400 // Default to GenericScheduler.
401 return createGenericSchedLive(this);
402}
403
404/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405/// the caller. We don't have a command line option to override the postRA
406/// scheduler. The Target must configure it.
407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408 // Get the postRA scheduler set by the target for this function.
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
410 if (Scheduler)
411 return Scheduler;
412
413 // Default to GenericScheduler.
414 return createGenericSchedPostRA(this);
415}
416
417/// Top-level MachineScheduler pass driver.
418///
419/// Visit blocks in function order. Divide each block into scheduling regions
420/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421/// consistent with the DAG builder, which traverses the interior of the
422/// scheduling regions bottom-up.
423///
424/// This design avoids exposing scheduling boundaries to the DAG builder,
425/// simplifying the DAG builder's support for "special" target instructions.
426/// At the same time the design allows target schedulers to operate across
427/// scheduling boundaries, for example to bundle the boundary instructions
428/// without reordering them. This creates complexity, because the target
429/// scheduler must update the RegionBegin and RegionEnd positions cached by
430/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431/// design would be to split blocks at scheduling boundaries, but LLVM has a
432/// general bias against block splitting purely for implementation simplicity.
433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434 if (skipFunction(mf.getFunction()))
435 return false;
436
437 if (EnableMachineSched.getNumOccurrences()) {
439 return false;
440 } else if (!mf.getSubtarget().enableMachineScheduler())
441 return false;
442
443 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445 // Initialize the context of the pass.
446 MF = &mf;
447 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
448 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
449 PassConfig = &getAnalysis<TargetPassConfig>();
450 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
453
454 if (VerifyScheduling) {
455 LLVM_DEBUG(LIS->dump());
456 MF->verify(this, "Before machine scheduling.");
457 }
458 RegClassInfo->runOnMachineFunction(*MF);
459
460 // Instantiate the selected scheduler for this target, function, and
461 // optimization level.
462 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
464 if (ForceTopDown)
466 else if (ForceBottomUp)
468 else
470 Scheduler->setDumpDirection(D);
471 scheduleRegions(*Scheduler, false);
472
473 LLVM_DEBUG(LIS->dump());
475 MF->verify(this, "After machine scheduling.");
476 return true;
477}
478
479bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
480 if (skipFunction(mf.getFunction()))
481 return false;
482
483 if (EnablePostRAMachineSched.getNumOccurrences()) {
485 return false;
486 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
487 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
488 return false;
489 }
490 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
491
492 // Initialize the context of the pass.
493 MF = &mf;
494 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
495 PassConfig = &getAnalysis<TargetPassConfig>();
496 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
497
499 MF->verify(this, "Before post machine scheduling.");
500
501 // Instantiate the selected scheduler for this target, function, and
502 // optimization level.
503 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
509 else
511 Scheduler->setDumpDirection(D);
512 scheduleRegions(*Scheduler, true);
513
515 MF->verify(this, "After post machine scheduling.");
516 return true;
517}
518
519/// Return true of the given instruction should not be included in a scheduling
520/// region.
521///
522/// MachineScheduler does not currently support scheduling across calls. To
523/// handle calls, the DAG builder needs to be modified to create register
524/// anti/output dependencies on the registers clobbered by the call's regmask
525/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
526/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
527/// the boundary, but there would be no benefit to postRA scheduling across
528/// calls this late anyway.
531 MachineFunction *MF,
532 const TargetInstrInfo *TII) {
533 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
534}
535
536/// A region of an MBB for scheduling.
537namespace {
538struct SchedRegion {
539 /// RegionBegin is the first instruction in the scheduling region, and
540 /// RegionEnd is either MBB->end() or the scheduling boundary after the
541 /// last instruction in the scheduling region. These iterators cannot refer
542 /// to instructions outside of the identified scheduling region because
543 /// those may be reordered before scheduling this region.
544 MachineBasicBlock::iterator RegionBegin;
546 unsigned NumRegionInstrs;
547
549 unsigned N) :
550 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
551};
552} // end anonymous namespace
553
555
556static void
558 MBBRegionsVector &Regions,
559 bool RegionsTopDown) {
562
564 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565 RegionEnd != MBB->begin(); RegionEnd = I) {
566
567 // Avoid decrementing RegionEnd for blocks with no terminator.
568 if (RegionEnd != MBB->end() ||
569 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
570 --RegionEnd;
571 }
572
573 // The next region starts above the previous region. Look backward in the
574 // instruction stream until we find the nearest boundary.
575 unsigned NumRegionInstrs = 0;
576 I = RegionEnd;
577 for (;I != MBB->begin(); --I) {
578 MachineInstr &MI = *std::prev(I);
579 if (isSchedBoundary(&MI, &*MBB, MF, TII))
580 break;
581 if (!MI.isDebugOrPseudoInstr()) {
582 // MBB::size() uses instr_iterator to count. Here we need a bundle to
583 // count as a single instruction.
584 ++NumRegionInstrs;
585 }
586 }
587
588 // It's possible we found a scheduling region that only has debug
589 // instructions. Don't bother scheduling these.
590 if (NumRegionInstrs != 0)
591 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
592 }
593
594 if (RegionsTopDown)
595 std::reverse(Regions.begin(), Regions.end());
596}
597
598/// Main driver for both MachineScheduler and PostMachineScheduler.
599void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
600 bool FixKillFlags) {
601 // Visit all machine basic blocks.
602 //
603 // TODO: Visit blocks in global postorder or postorder within the bottom-up
604 // loop tree. Then we can optionally compute global RegPressure.
605 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
606 MBB != MBBEnd; ++MBB) {
607
608 Scheduler.startBlock(&*MBB);
609
610#ifndef NDEBUG
611 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
612 continue;
613 if (SchedOnlyBlock.getNumOccurrences()
614 && (int)SchedOnlyBlock != MBB->getNumber())
615 continue;
616#endif
617
618 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
619 // points to the scheduling boundary at the bottom of the region. The DAG
620 // does not include RegionEnd, but the region does (i.e. the next
621 // RegionEnd is above the previous RegionBegin). If the current block has
622 // no terminator then RegionEnd == MBB->end() for the bottom region.
623 //
624 // All the regions of MBB are first found and stored in MBBRegions, which
625 // will be processed (MBB) top-down if initialized with true.
626 //
627 // The Scheduler may insert instructions during either schedule() or
628 // exitRegion(), even for empty regions. So the local iterators 'I' and
629 // 'RegionEnd' are invalid across these calls. Instructions must not be
630 // added to other regions than the current one without updating MBBRegions.
631
632 MBBRegionsVector MBBRegions;
633 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
634 for (const SchedRegion &R : MBBRegions) {
635 MachineBasicBlock::iterator I = R.RegionBegin;
636 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
637 unsigned NumRegionInstrs = R.NumRegionInstrs;
638
639 // Notify the scheduler of the region, even if we may skip scheduling
640 // it. Perhaps it still needs to be bundled.
641 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
642
643 // Skip empty scheduling regions (0 or 1 schedulable instructions).
644 if (I == RegionEnd || I == std::prev(RegionEnd)) {
645 // Close the current region. Bundle the terminator if needed.
646 // This invalidates 'RegionEnd' and 'I'.
647 Scheduler.exitRegion();
648 continue;
649 }
650 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
651 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652 << " " << MBB->getName() << "\n From: " << *I
653 << " To: ";
654 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
655 else dbgs() << "End\n";
656 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
658 errs() << MF->getName();
659 errs() << ":%bb. " << MBB->getNumber();
660 errs() << " " << MBB->getName() << " \n";
661 }
662
663 // Schedule a region: possibly reorder instructions.
664 // This invalidates the original region iterators.
665 Scheduler.schedule();
666
667 // Close the current region.
668 Scheduler.exitRegion();
669 }
670 Scheduler.finishBlock();
671 // FIXME: Ideally, no further passes should rely on kill flags. However,
672 // thumb2 size reduction is currently an exception, so the PostMIScheduler
673 // needs to do this.
674 if (FixKillFlags)
675 Scheduler.fixupKills(*MBB);
676 }
677 Scheduler.finalizeSchedule();
678}
679
680void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
681 // unimplemented
682}
683
684#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686 dbgs() << "Queue " << Name << ": ";
687 for (const SUnit *SU : Queue)
688 dbgs() << SU->NodeNum << " ";
689 dbgs() << "\n";
690}
691#endif
692
693//===----------------------------------------------------------------------===//
694// ScheduleDAGMI - Basic machine instruction scheduling. This is
695// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
696// virtual registers.
697// ===----------------------------------------------------------------------===/
698
699// Provide a vtable anchor.
701
702/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
703/// NumPredsLeft reaches zero, release the successor node.
704///
705/// FIXME: Adjust SuccSU height based on MinLatency.
707 SUnit *SuccSU = SuccEdge->getSUnit();
708
709 if (SuccEdge->isWeak()) {
710 --SuccSU->WeakPredsLeft;
711 if (SuccEdge->isCluster())
712 NextClusterSucc = SuccSU;
713 return;
714 }
715#ifndef NDEBUG
716 if (SuccSU->NumPredsLeft == 0) {
717 dbgs() << "*** Scheduling failed! ***\n";
718 dumpNode(*SuccSU);
719 dbgs() << " has been released too many times!\n";
720 llvm_unreachable(nullptr);
721 }
722#endif
723 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
724 // CurrCycle may have advanced since then.
725 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
727
728 --SuccSU->NumPredsLeft;
729 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
730 SchedImpl->releaseTopNode(SuccSU);
731}
732
733/// releaseSuccessors - Call releaseSucc on each of SU's successors.
735 for (SDep &Succ : SU->Succs)
736 releaseSucc(SU, &Succ);
737}
738
739/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
740/// NumSuccsLeft reaches zero, release the predecessor node.
741///
742/// FIXME: Adjust PredSU height based on MinLatency.
744 SUnit *PredSU = PredEdge->getSUnit();
745
746 if (PredEdge->isWeak()) {
747 --PredSU->WeakSuccsLeft;
748 if (PredEdge->isCluster())
749 NextClusterPred = PredSU;
750 return;
751 }
752#ifndef NDEBUG
753 if (PredSU->NumSuccsLeft == 0) {
754 dbgs() << "*** Scheduling failed! ***\n";
755 dumpNode(*PredSU);
756 dbgs() << " has been released too many times!\n";
757 llvm_unreachable(nullptr);
758 }
759#endif
760 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
761 // CurrCycle may have advanced since then.
762 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
764
765 --PredSU->NumSuccsLeft;
766 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
767 SchedImpl->releaseBottomNode(PredSU);
768}
769
770/// releasePredecessors - Call releasePred on each of SU's predecessors.
772 for (SDep &Pred : SU->Preds)
773 releasePred(SU, &Pred);
774}
775
778 SchedImpl->enterMBB(bb);
779}
780
782 SchedImpl->leaveMBB();
784}
785
786/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
787/// after crossing a scheduling boundary. [begin, end) includes all instructions
788/// in the region, including the boundary itself and single-instruction regions
789/// that don't get scheduled.
793 unsigned regioninstrs)
794{
795 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
796
797 SchedImpl->initPolicy(begin, end, regioninstrs);
798}
799
800/// This is normally called from the main scheduler loop but may also be invoked
801/// by the scheduling strategy to perform additional code motion.
804 // Advance RegionBegin if the first instruction moves down.
805 if (&*RegionBegin == MI)
806 ++RegionBegin;
807
808 // Update the instruction stream.
809 BB->splice(InsertPos, BB, MI);
810
811 // Update LiveIntervals
812 if (LIS)
813 LIS->handleMove(*MI, /*UpdateFlags=*/true);
814
815 // Recede RegionBegin if an instruction moves above the first.
816 if (RegionBegin == InsertPos)
817 RegionBegin = MI;
818}
819
821#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
822 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
824 return false;
825 }
826 ++NumInstrsScheduled;
827#endif
828 return true;
829}
830
831/// Per-region scheduling driver, called back from
832/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
833/// that does not consider liveness or register pressure. It is useful for
834/// PostRA scheduling and potentially other custom schedulers.
836 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
837 LLVM_DEBUG(SchedImpl->dumpPolicy());
838
839 // Build the DAG.
841
843
844 SmallVector<SUnit*, 8> TopRoots, BotRoots;
845 findRootsAndBiasEdges(TopRoots, BotRoots);
846
847 LLVM_DEBUG(dump());
848 if (PrintDAGs) dump();
850
851 // Initialize the strategy before modifying the DAG.
852 // This may initialize a DFSResult to be used for queue priority.
853 SchedImpl->initialize(this);
854
855 // Initialize ready queues now that the DAG and priority data are finalized.
856 initQueues(TopRoots, BotRoots);
857
858 bool IsTopNode = false;
859 while (true) {
860 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
861 SUnit *SU = SchedImpl->pickNode(IsTopNode);
862 if (!SU) break;
863
864 assert(!SU->isScheduled && "Node already scheduled");
865 if (!checkSchedLimit())
866 break;
867
868 MachineInstr *MI = SU->getInstr();
869 if (IsTopNode) {
870 assert(SU->isTopReady() && "node still has unscheduled dependencies");
871 if (&*CurrentTop == MI)
873 else
875 } else {
876 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
879 if (&*priorII == MI)
880 CurrentBottom = priorII;
881 else {
882 if (&*CurrentTop == MI)
883 CurrentTop = nextIfDebug(++CurrentTop, priorII);
886 }
887 }
888 // Notify the scheduling strategy before updating the DAG.
889 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
890 // runs, it can then use the accurate ReadyCycle time to determine whether
891 // newly released nodes can move to the readyQ.
892 SchedImpl->schedNode(SU, IsTopNode);
893
894 updateQueues(SU, IsTopNode);
895 }
896 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
897
899
900 LLVM_DEBUG({
901 dbgs() << "*** Final schedule for "
902 << printMBBReference(*begin()->getParent()) << " ***\n";
903 dumpSchedule();
904 dbgs() << '\n';
905 });
906}
907
908/// Apply each ScheduleDAGMutation step in order.
910 for (auto &m : Mutations)
911 m->apply(this);
912}
913
916 SmallVectorImpl<SUnit*> &BotRoots) {
917 for (SUnit &SU : SUnits) {
918 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
919
920 // Order predecessors so DFSResult follows the critical path.
921 SU.biasCriticalPath();
922
923 // A SUnit is ready to top schedule if it has no predecessors.
924 if (!SU.NumPredsLeft)
925 TopRoots.push_back(&SU);
926 // A SUnit is ready to bottom schedule if it has no successors.
927 if (!SU.NumSuccsLeft)
928 BotRoots.push_back(&SU);
929 }
931}
932
933/// Identify DAG roots and setup scheduler queues.
935 ArrayRef<SUnit*> BotRoots) {
936 NextClusterSucc = nullptr;
937 NextClusterPred = nullptr;
938
939 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
940 //
941 // Nodes with unreleased weak edges can still be roots.
942 // Release top roots in forward order.
943 for (SUnit *SU : TopRoots)
944 SchedImpl->releaseTopNode(SU);
945
946 // Release bottom roots in reverse order so the higher priority nodes appear
947 // first. This is more natural and slightly more efficient.
949 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
950 SchedImpl->releaseBottomNode(*I);
951 }
952
955
956 SchedImpl->registerRoots();
957
958 // Advance past initial DebugValues.
961}
962
963/// Update scheduler queues after scheduling an instruction.
964void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
965 // Release dependent instructions for scheduling.
966 if (IsTopNode)
968 else
970
971 SU->isScheduled = true;
972}
973
974/// Reinsert any remaining debug_values, just like the PostRA scheduler.
976 // If first instruction was a DBG_VALUE then put it back.
977 if (FirstDbgValue) {
980 }
981
982 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
983 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
984 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
985 MachineInstr *DbgValue = P.first;
986 MachineBasicBlock::iterator OrigPrevMI = P.second;
987 if (&*RegionBegin == DbgValue)
988 ++RegionBegin;
989 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
990 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
992 }
993}
994
995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996static const char *scheduleTableLegend = " i: issue\n x: resource booked";
997
999 // Bail off when there is no schedule model to query.
1001 return;
1002
1003 // Nothing to show if there is no or just one instruction.
1004 if (BB->size() < 2)
1005 return;
1006
1007 dbgs() << " * Schedule table (TopDown):\n";
1008 dbgs() << scheduleTableLegend << "\n";
1009 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1010 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1011 for (MachineInstr &MI : *this) {
1012 SUnit *SU = getSUnit(&MI);
1013 if (!SU)
1014 continue;
1015 const MCSchedClassDesc *SC = getSchedClass(SU);
1018 PI != PE; ++PI) {
1019 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1020 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1021 }
1022 }
1023 // Print the header with the cycles
1024 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1025 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1026 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1027 dbgs() << "|\n";
1028
1029 for (MachineInstr &MI : *this) {
1030 SUnit *SU = getSUnit(&MI);
1031 if (!SU) {
1032 dbgs() << "Missing SUnit\n";
1033 continue;
1034 }
1035 std::string NodeName("SU(");
1036 NodeName += std::to_string(SU->NodeNum) + ")";
1037 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1038 unsigned C = FirstCycle;
1039 for (; C <= LastCycle; ++C) {
1040 if (C == SU->TopReadyCycle)
1041 dbgs() << llvm::left_justify("| i", ColWidth);
1042 else
1043 dbgs() << llvm::left_justify("|", ColWidth);
1044 }
1045 dbgs() << "|\n";
1046 const MCSchedClassDesc *SC = getSchedClass(SU);
1047
1051
1053 llvm::stable_sort(ResourcesIt,
1054 [](const MCWriteProcResEntry &LHS,
1055 const MCWriteProcResEntry &RHS) -> bool {
1056 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1057 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1058 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1059 });
1060 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1061 C = FirstCycle;
1062 const std::string ResName =
1063 SchedModel.getResourceName(PI.ProcResourceIdx);
1064 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1065 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1066 dbgs() << llvm::left_justify("|", ColWidth);
1067 }
1068 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1069 ++I, ++C)
1070 dbgs() << llvm::left_justify("| x", ColWidth);
1071 while (C++ <= LastCycle)
1072 dbgs() << llvm::left_justify("|", ColWidth);
1073 // Place end char
1074 dbgs() << "| \n";
1075 }
1076 }
1077}
1078
1080 // Bail off when there is no schedule model to query.
1082 return;
1083
1084 // Nothing to show if there is no or just one instruction.
1085 if (BB->size() < 2)
1086 return;
1087
1088 dbgs() << " * Schedule table (BottomUp):\n";
1089 dbgs() << scheduleTableLegend << "\n";
1090
1091 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1092 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1093 for (MachineInstr &MI : *this) {
1094 SUnit *SU = getSUnit(&MI);
1095 if (!SU)
1096 continue;
1097 const MCSchedClassDesc *SC = getSchedClass(SU);
1100 PI != PE; ++PI) {
1101 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1102 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1103 }
1104 }
1105 // Print the header with the cycles
1106 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1107 for (int C = FirstCycle; C >= LastCycle; --C)
1108 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1109 dbgs() << "|\n";
1110
1111 for (MachineInstr &MI : *this) {
1112 SUnit *SU = getSUnit(&MI);
1113 if (!SU) {
1114 dbgs() << "Missing SUnit\n";
1115 continue;
1116 }
1117 std::string NodeName("SU(");
1118 NodeName += std::to_string(SU->NodeNum) + ")";
1119 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1120 int C = FirstCycle;
1121 for (; C >= LastCycle; --C) {
1122 if (C == (int)SU->BotReadyCycle)
1123 dbgs() << llvm::left_justify("| i", ColWidth);
1124 else
1125 dbgs() << llvm::left_justify("|", ColWidth);
1126 }
1127 dbgs() << "|\n";
1128 const MCSchedClassDesc *SC = getSchedClass(SU);
1132
1134 llvm::stable_sort(ResourcesIt,
1135 [](const MCWriteProcResEntry &LHS,
1136 const MCWriteProcResEntry &RHS) -> bool {
1137 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1138 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1139 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1140 });
1141 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1142 C = FirstCycle;
1143 const std::string ResName =
1144 SchedModel.getResourceName(PI.ProcResourceIdx);
1145 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1146 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1147 dbgs() << llvm::left_justify("|", ColWidth);
1148 }
1149 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150 ++I, --C)
1151 dbgs() << llvm::left_justify("| x", ColWidth);
1152 while (C-- >= LastCycle)
1153 dbgs() << llvm::left_justify("|", ColWidth);
1154 // Place end char
1155 dbgs() << "| \n";
1156 }
1157 }
1158}
1159#endif
1160
1161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1166 else if (DumpDir == DumpDirection::BottomUp)
1169 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1170 } else {
1171 dbgs() << "* Schedule table: DumpDirection not set.\n";
1172 }
1173 }
1174
1175 for (MachineInstr &MI : *this) {
1176 if (SUnit *SU = getSUnit(&MI))
1177 dumpNode(*SU);
1178 else
1179 dbgs() << "Missing SUnit\n";
1180 }
1181}
1182#endif
1183
1184//===----------------------------------------------------------------------===//
1185// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1186// preservation.
1187//===----------------------------------------------------------------------===//
1188
1190 delete DFSResult;
1191}
1192
1194 const MachineInstr &MI = *SU.getInstr();
1195 for (const MachineOperand &MO : MI.operands()) {
1196 if (!MO.isReg())
1197 continue;
1198 if (!MO.readsReg())
1199 continue;
1200 if (TrackLaneMasks && !MO.isUse())
1201 continue;
1202
1203 Register Reg = MO.getReg();
1204 if (!Reg.isVirtual())
1205 continue;
1206
1207 // Ignore re-defs.
1208 if (TrackLaneMasks) {
1209 bool FoundDef = false;
1210 for (const MachineOperand &MO2 : MI.all_defs()) {
1211 if (MO2.getReg() == Reg && !MO2.isDead()) {
1212 FoundDef = true;
1213 break;
1214 }
1215 }
1216 if (FoundDef)
1217 continue;
1218 }
1219
1220 // Record this local VReg use.
1222 for (; UI != VRegUses.end(); ++UI) {
1223 if (UI->SU == &SU)
1224 break;
1225 }
1226 if (UI == VRegUses.end())
1228 }
1229}
1230
1231/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1232/// crossing a scheduling boundary. [begin, end) includes all instructions in
1233/// the region, including the boundary itself and single-instruction regions
1234/// that don't get scheduled.
1238 unsigned regioninstrs)
1239{
1240 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1241 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1242
1243 // For convenience remember the end of the liveness region.
1244 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1245
1247
1248 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1249 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1250
1252 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1253}
1254
1255// Setup the register pressure trackers for the top scheduled and bottom
1256// scheduled regions.
1258 VRegUses.clear();
1260 for (SUnit &SU : SUnits)
1261 collectVRegUses(SU);
1262
1264 ShouldTrackLaneMasks, false);
1266 ShouldTrackLaneMasks, false);
1267
1268 // Close the RPTracker to finalize live ins.
1270
1272
1273 // Initialize the live ins and live outs.
1276
1277 // Close one end of the tracker so we can call
1278 // getMaxUpward/DownwardPressureDelta before advancing across any
1279 // instructions. This converts currently live regs into live ins/outs.
1282
1284 if (!BotRPTracker.getLiveThru().empty()) {
1286 LLVM_DEBUG(dbgs() << "Live Thru: ";
1288 };
1289
1290 // For each live out vreg reduce the pressure change associated with other
1291 // uses of the same vreg below the live-out reaching def.
1293
1294 // Account for liveness generated by the region boundary.
1295 if (LiveRegionEnd != RegionEnd) {
1297 BotRPTracker.recede(&LiveUses);
1298 updatePressureDiffs(LiveUses);
1299 }
1300
1301 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1303 dbgs() << "Bottom Pressure:\n";
1305
1307 (RegionEnd->isDebugInstr() &&
1309 "Can't find the region bottom");
1310
1311 // Cache the list of excess pressure sets in this region. This will also track
1312 // the max pressure in the scheduled code for these sets.
1313 RegionCriticalPSets.clear();
1314 const std::vector<unsigned> &RegionPressure =
1316 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1317 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1318 if (RegionPressure[i] > Limit) {
1319 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1320 << " Actual " << RegionPressure[i] << "\n");
1321 RegionCriticalPSets.push_back(PressureChange(i));
1322 }
1323 }
1324 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1325 for (const PressureChange &RCPS
1327 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1328 dbgs() << "\n");
1329}
1330
1333 const std::vector<unsigned> &NewMaxPressure) {
1334 const PressureDiff &PDiff = getPressureDiff(SU);
1335 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1336 for (const PressureChange &PC : PDiff) {
1337 if (!PC.isValid())
1338 break;
1339 unsigned ID = PC.getPSet();
1340 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1341 ++CritIdx;
1342 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1343 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1344 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1345 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1346 }
1347 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1348 if (NewMaxPressure[ID] >= Limit - 2) {
1349 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1350 << NewMaxPressure[ID]
1351 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1352 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1353 << " livethru)\n");
1354 }
1355 }
1356}
1357
1358/// Update the PressureDiff array for liveness after scheduling this
1359/// instruction.
1361 ArrayRef<RegisterMaskPair> LiveUses) {
1362 for (const RegisterMaskPair &P : LiveUses) {
1363 Register Reg = P.RegUnit;
1364 /// FIXME: Currently assuming single-use physregs.
1365 if (!Reg.isVirtual())
1366 continue;
1367
1369 // If the register has just become live then other uses won't change
1370 // this fact anymore => decrement pressure.
1371 // If the register has just become dead then other uses make it come
1372 // back to life => increment pressure.
1373 bool Decrement = P.LaneMask.any();
1374
1375 for (const VReg2SUnit &V2SU
1376 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1377 SUnit &SU = *V2SU.SU;
1378 if (SU.isScheduled || &SU == &ExitSU)
1379 continue;
1380
1381 PressureDiff &PDiff = getPressureDiff(&SU);
1382 PDiff.addPressureChange(Reg, Decrement, &MRI);
1383 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1384 << printReg(Reg, TRI) << ':'
1385 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1386 dbgs() << " to "; PDiff.dump(*TRI););
1387 }
1388 } else {
1389 assert(P.LaneMask.any());
1390 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1391 // This may be called before CurrentBottom has been initialized. However,
1392 // BotRPTracker must have a valid position. We want the value live into the
1393 // instruction or live out of the block, so ask for the previous
1394 // instruction's live-out.
1395 const LiveInterval &LI = LIS->getInterval(Reg);
1396 VNInfo *VNI;
1399 if (I == BB->end())
1400 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1401 else {
1403 VNI = LRQ.valueIn();
1404 }
1405 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1406 assert(VNI && "No live value at use.");
1407 for (const VReg2SUnit &V2SU
1408 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1409 SUnit *SU = V2SU.SU;
1410 // If this use comes before the reaching def, it cannot be a last use,
1411 // so decrease its pressure change.
1412 if (!SU->isScheduled && SU != &ExitSU) {
1413 LiveQueryResult LRQ =
1415 if (LRQ.valueIn() == VNI) {
1416 PressureDiff &PDiff = getPressureDiff(SU);
1417 PDiff.addPressureChange(Reg, true, &MRI);
1418 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419 << *SU->getInstr();
1420 dbgs() << " to "; PDiff.dump(*TRI););
1421 }
1422 }
1423 }
1424 }
1425 }
1426}
1427
1429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 if (EntrySU.getInstr() != nullptr)
1432 for (const SUnit &SU : SUnits) {
1433 dumpNodeAll(SU);
1434 if (ShouldTrackPressure) {
1435 dbgs() << " Pressure Diff : ";
1436 getPressureDiff(&SU).dump(*TRI);
1437 }
1438 dbgs() << " Single Issue : ";
1439 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1440 SchedModel.mustEndGroup(SU.getInstr()))
1441 dbgs() << "true;";
1442 else
1443 dbgs() << "false;";
1444 dbgs() << '\n';
1445 }
1446 if (ExitSU.getInstr() != nullptr)
1448#endif
1449}
1450
1451/// schedule - Called back from MachineScheduler::runOnMachineFunction
1452/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1453/// only includes instructions that have DAG nodes, not scheduling boundaries.
1454///
1455/// This is a skeletal driver, with all the functionality pushed into helpers,
1456/// so that it can be easily extended by experimental schedulers. Generally,
1457/// implementing MachineSchedStrategy should be sufficient to implement a new
1458/// scheduling algorithm. However, if a scheduler further subclasses
1459/// ScheduleDAGMILive then it will want to override this virtual method in order
1460/// to update any specialized state.
1462 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1463 LLVM_DEBUG(SchedImpl->dumpPolicy());
1465
1467
1468 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1469 findRootsAndBiasEdges(TopRoots, BotRoots);
1470
1471 // Initialize the strategy before modifying the DAG.
1472 // This may initialize a DFSResult to be used for queue priority.
1473 SchedImpl->initialize(this);
1474
1475 LLVM_DEBUG(dump());
1476 if (PrintDAGs) dump();
1478
1479 // Initialize ready queues now that the DAG and priority data are finalized.
1480 initQueues(TopRoots, BotRoots);
1481
1482 bool IsTopNode = false;
1483 while (true) {
1484 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1485 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1486 if (!SU) break;
1487
1488 assert(!SU->isScheduled && "Node already scheduled");
1489 if (!checkSchedLimit())
1490 break;
1491
1492 scheduleMI(SU, IsTopNode);
1493
1494 if (DFSResult) {
1495 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1496 if (!ScheduledTrees.test(SubtreeID)) {
1497 ScheduledTrees.set(SubtreeID);
1498 DFSResult->scheduleTree(SubtreeID);
1499 SchedImpl->scheduleTree(SubtreeID);
1500 }
1501 }
1502
1503 // Notify the scheduling strategy after updating the DAG.
1504 SchedImpl->schedNode(SU, IsTopNode);
1505
1506 updateQueues(SU, IsTopNode);
1507 }
1508 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1509
1511
1512 LLVM_DEBUG({
1513 dbgs() << "*** Final schedule for "
1514 << printMBBReference(*begin()->getParent()) << " ***\n";
1515 dumpSchedule();
1516 dbgs() << '\n';
1517 });
1518}
1519
1520/// Build the DAG and setup three register pressure trackers.
1522 if (!ShouldTrackPressure) {
1523 RPTracker.reset();
1524 RegionCriticalPSets.clear();
1526 return;
1527 }
1528
1529 // Initialize the register pressure tracker used by buildSchedGraph.
1531 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1532
1533 // Account for liveness generate by the region boundary.
1534 if (LiveRegionEnd != RegionEnd)
1535 RPTracker.recede();
1536
1537 // Build the DAG, and compute current register pressure.
1539
1540 // Initialize top/bottom trackers after computing region pressure.
1542}
1543
1545 if (!DFSResult)
1546 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1547 DFSResult->clear();
1549 DFSResult->resize(SUnits.size());
1552}
1553
1554/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1555/// only provides the critical path for single block loops. To handle loops that
1556/// span blocks, we could use the vreg path latencies provided by
1557/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1558/// available for use in the scheduler.
1559///
1560/// The cyclic path estimation identifies a def-use pair that crosses the back
1561/// edge and considers the depth and height of the nodes. For example, consider
1562/// the following instruction sequence where each instruction has unit latency
1563/// and defines an eponymous virtual register:
1564///
1565/// a->b(a,c)->c(b)->d(c)->exit
1566///
1567/// The cyclic critical path is a two cycles: b->c->b
1568/// The acyclic critical path is four cycles: a->b->c->d->exit
1569/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572/// LiveInDepth = depth(b) = len(a->b) = 1
1573///
1574/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1576/// CyclicCriticalPath = min(2, 2) = 2
1577///
1578/// This could be relevant to PostRA scheduling, but is currently implemented
1579/// assuming LiveIntervals.
1581 // This only applies to single block loop.
1582 if (!BB->isSuccessor(BB))
1583 return 0;
1584
1585 unsigned MaxCyclicLatency = 0;
1586 // Visit each live out vreg def to find def/use pairs that cross iterations.
1588 Register Reg = P.RegUnit;
1589 if (!Reg.isVirtual())
1590 continue;
1591 const LiveInterval &LI = LIS->getInterval(Reg);
1592 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1593 if (!DefVNI)
1594 continue;
1595
1597 const SUnit *DefSU = getSUnit(DefMI);
1598 if (!DefSU)
1599 continue;
1600
1601 unsigned LiveOutHeight = DefSU->getHeight();
1602 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1603 // Visit all local users of the vreg def.
1604 for (const VReg2SUnit &V2SU
1605 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1606 SUnit *SU = V2SU.SU;
1607 if (SU == &ExitSU)
1608 continue;
1609
1610 // Only consider uses of the phi.
1612 if (!LRQ.valueIn()->isPHIDef())
1613 continue;
1614
1615 // Assume that a path spanning two iterations is a cycle, which could
1616 // overestimate in strange cases. This allows cyclic latency to be
1617 // estimated as the minimum slack of the vreg's depth or height.
1618 unsigned CyclicLatency = 0;
1619 if (LiveOutDepth > SU->getDepth())
1620 CyclicLatency = LiveOutDepth - SU->getDepth();
1621
1622 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1623 if (LiveInHeight > LiveOutHeight) {
1624 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625 CyclicLatency = LiveInHeight - LiveOutHeight;
1626 } else
1627 CyclicLatency = 0;
1628
1629 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1631 if (CyclicLatency > MaxCyclicLatency)
1632 MaxCyclicLatency = CyclicLatency;
1633 }
1634 }
1635 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1636 return MaxCyclicLatency;
1637}
1638
1639/// Release ExitSU predecessors and setup scheduler queues. Re-position
1640/// the Top RP tracker in case the region beginning has changed.
1642 ArrayRef<SUnit*> BotRoots) {
1643 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1644 if (ShouldTrackPressure) {
1645 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1647 }
1648}
1649
1650/// Move an instruction and update register pressure.
1651void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1652 // Move the instruction to its new location in the instruction stream.
1653 MachineInstr *MI = SU->getInstr();
1654
1655 if (IsTopNode) {
1656 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1657 if (&*CurrentTop == MI)
1659 else {
1662 }
1663
1664 if (ShouldTrackPressure) {
1665 // Update top scheduled pressure.
1666 RegisterOperands RegOpers;
1667 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1668 /*IgnoreDead=*/false);
1670 // Adjust liveness and add missing dead+read-undef flags.
1671 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1672 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1673 } else {
1674 // Adjust for missing dead-def flags.
1675 RegOpers.detectDeadDefs(*MI, *LIS);
1676 }
1677
1678 TopRPTracker.advance(RegOpers);
1679 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1680 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1682
1684 }
1685 } else {
1686 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1689 if (&*priorII == MI)
1690 CurrentBottom = priorII;
1691 else {
1692 if (&*CurrentTop == MI) {
1693 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1695 }
1697 CurrentBottom = MI;
1699 }
1700 if (ShouldTrackPressure) {
1701 RegisterOperands RegOpers;
1702 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1703 /*IgnoreDead=*/false);
1705 // Adjust liveness and add missing dead+read-undef flags.
1706 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1707 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1708 } else {
1709 // Adjust for missing dead-def flags.
1710 RegOpers.detectDeadDefs(*MI, *LIS);
1711 }
1712
1716 BotRPTracker.recede(RegOpers, &LiveUses);
1717 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1718 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1720
1722 updatePressureDiffs(LiveUses);
1723 }
1724 }
1725}
1726
1727//===----------------------------------------------------------------------===//
1728// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1729//===----------------------------------------------------------------------===//
1730
1731namespace {
1732
1733/// Post-process the DAG to create cluster edges between neighboring
1734/// loads or between neighboring stores.
1735class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1736 struct MemOpInfo {
1737 SUnit *SU;
1739 int64_t Offset;
1740 LocationSize Width;
1741 bool OffsetIsScalable;
1742
1743 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1744 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1745 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1746 OffsetIsScalable(OffsetIsScalable) {}
1747
1748 static bool Compare(const MachineOperand *const &A,
1749 const MachineOperand *const &B) {
1750 if (A->getType() != B->getType())
1751 return A->getType() < B->getType();
1752 if (A->isReg())
1753 return A->getReg() < B->getReg();
1754 if (A->isFI()) {
1755 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1757 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1759 return StackGrowsDown ? A->getIndex() > B->getIndex()
1760 : A->getIndex() < B->getIndex();
1761 }
1762
1763 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1764 "index bases.");
1765 }
1766
1767 bool operator<(const MemOpInfo &RHS) const {
1768 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1769 // comparison instead when it's available.
1770 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1771 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1772 Compare))
1773 return true;
1774 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1775 BaseOps.begin(), BaseOps.end(), Compare))
1776 return false;
1777 if (Offset != RHS.Offset)
1778 return Offset < RHS.Offset;
1779 return SU->NodeNum < RHS.SU->NodeNum;
1780 }
1781 };
1782
1783 const TargetInstrInfo *TII;
1784 const TargetRegisterInfo *TRI;
1785 bool IsLoad;
1786 bool ReorderWhileClustering;
1787
1788public:
1789 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1790 const TargetRegisterInfo *tri, bool IsLoad,
1791 bool ReorderWhileClustering)
1792 : TII(tii), TRI(tri), IsLoad(IsLoad),
1793 ReorderWhileClustering(ReorderWhileClustering) {}
1794
1795 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1796
1797protected:
1798 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1799 ScheduleDAGInstrs *DAG);
1800 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1801 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1802 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1804};
1805
1806class StoreClusterMutation : public BaseMemOpClusterMutation {
1807public:
1808 StoreClusterMutation(const TargetInstrInfo *tii,
1809 const TargetRegisterInfo *tri,
1810 bool ReorderWhileClustering)
1811 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1812};
1813
1814class LoadClusterMutation : public BaseMemOpClusterMutation {
1815public:
1816 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1817 bool ReorderWhileClustering)
1818 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1819};
1820
1821} // end anonymous namespace
1822
1823namespace llvm {
1824
1825std::unique_ptr<ScheduleDAGMutation>
1827 const TargetRegisterInfo *TRI,
1828 bool ReorderWhileClustering) {
1829 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1830 TII, TRI, ReorderWhileClustering)
1831 : nullptr;
1832}
1833
1834std::unique_ptr<ScheduleDAGMutation>
1836 const TargetRegisterInfo *TRI,
1837 bool ReorderWhileClustering) {
1838 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1839 TII, TRI, ReorderWhileClustering)
1840 : nullptr;
1841}
1842
1843} // end namespace llvm
1844
1845// Sorting all the loads/stores first, then for each load/store, checking the
1846// following load/store one by one, until reach the first non-dependent one and
1847// call target hook to see if they can cluster.
1848// If FastCluster is enabled, we assume that, all the loads/stores have been
1849// preprocessed and now, they didn't have dependencies on each other.
1850void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1851 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1852 ScheduleDAGInstrs *DAG) {
1853 // Keep track of the current cluster length and bytes for each SUnit.
1855
1856 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1857 // cluster mem ops collected within `MemOpRecords` array.
1858 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1859 // Decision to cluster mem ops is taken based on target dependent logic
1860 auto MemOpa = MemOpRecords[Idx];
1861
1862 // Seek for the next load/store to do the cluster.
1863 unsigned NextIdx = Idx + 1;
1864 for (; NextIdx < End; ++NextIdx)
1865 // Skip if MemOpb has been clustered already or has dependency with
1866 // MemOpa.
1867 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1868 (FastCluster ||
1869 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1870 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1871 break;
1872 if (NextIdx == End)
1873 continue;
1874
1875 auto MemOpb = MemOpRecords[NextIdx];
1876 unsigned ClusterLength = 2;
1877 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1878 MemOpb.Width.getValue().getKnownMinValue();
1879 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1880 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1881 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1882 MemOpb.Width.getValue().getKnownMinValue();
1883 }
1884
1885 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1886 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1887 MemOpb.Offset, MemOpb.OffsetIsScalable,
1888 ClusterLength, CurrentClusterBytes))
1889 continue;
1890
1891 SUnit *SUa = MemOpa.SU;
1892 SUnit *SUb = MemOpb.SU;
1893 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1894 std::swap(SUa, SUb);
1895
1896 // FIXME: Is this check really required?
1897 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1898 continue;
1899
1900 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1901 << SUb->NodeNum << ")\n");
1902 ++NumClustered;
1903
1904 if (IsLoad) {
1905 // Copy successor edges from SUa to SUb. Interleaving computation
1906 // dependent on SUa can prevent load combining due to register reuse.
1907 // Predecessor edges do not need to be copied from SUb to SUa since
1908 // nearby loads should have effectively the same inputs.
1909 for (const SDep &Succ : SUa->Succs) {
1910 if (Succ.getSUnit() == SUb)
1911 continue;
1912 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1913 << ")\n");
1914 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1915 }
1916 } else {
1917 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1918 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1919 // do not need to be copied from SUa to SUb since no one will depend
1920 // on stores.
1921 // Notice that, we don't need to care about the memory dependency as
1922 // we won't try to cluster them if they have any memory dependency.
1923 for (const SDep &Pred : SUb->Preds) {
1924 if (Pred.getSUnit() == SUa)
1925 continue;
1926 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1927 << ")\n");
1928 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1929 }
1930 }
1931
1932 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1933 CurrentClusterBytes};
1934
1935 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1936 << ", Curr cluster bytes: " << CurrentClusterBytes
1937 << "\n");
1938 }
1939}
1940
1941void BaseMemOpClusterMutation::collectMemOpRecords(
1942 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1943 for (auto &SU : SUnits) {
1944 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1945 (!IsLoad && !SU.getInstr()->mayStore()))
1946 continue;
1947
1948 const MachineInstr &MI = *SU.getInstr();
1950 int64_t Offset;
1951 bool OffsetIsScalable;
1952 LocationSize Width = 0;
1954 OffsetIsScalable, Width, TRI)) {
1955 MemOpRecords.push_back(
1956 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1957
1958 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1959 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1960 << ", Width: " << Width << "\n");
1961 }
1962#ifndef NDEBUG
1963 for (const auto *Op : BaseOps)
1964 assert(Op);
1965#endif
1966 }
1967}
1968
1969bool BaseMemOpClusterMutation::groupMemOps(
1972 bool FastCluster =
1974 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1975
1976 for (const auto &MemOp : MemOps) {
1977 unsigned ChainPredID = DAG->SUnits.size();
1978 if (FastCluster) {
1979 for (const SDep &Pred : MemOp.SU->Preds) {
1980 // We only want to cluster the mem ops that have the same ctrl(non-data)
1981 // pred so that they didn't have ctrl dependency for each other. But for
1982 // store instrs, we can still cluster them if the pred is load instr.
1983 if ((Pred.isCtrl() &&
1984 (IsLoad ||
1985 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1986 !Pred.isArtificial()) {
1987 ChainPredID = Pred.getSUnit()->NodeNum;
1988 break;
1989 }
1990 }
1991 } else
1992 ChainPredID = 0;
1993
1994 Groups[ChainPredID].push_back(MemOp);
1995 }
1996 return FastCluster;
1997}
1998
1999/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2000void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2001 // Collect all the clusterable loads/stores
2002 SmallVector<MemOpInfo, 32> MemOpRecords;
2003 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2004
2005 if (MemOpRecords.size() < 2)
2006 return;
2007
2008 // Put the loads/stores without dependency into the same group with some
2009 // heuristic if the DAG is too complex to avoid compiling time blow up.
2010 // Notice that, some fusion pair could be lost with this.
2012 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2013
2014 for (auto &Group : Groups) {
2015 // Sorting the loads/stores, so that, we can stop the cluster as early as
2016 // possible.
2017 llvm::sort(Group.second);
2018
2019 // Trying to cluster all the neighboring loads/stores.
2020 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2021 }
2022}
2023
2024//===----------------------------------------------------------------------===//
2025// CopyConstrain - DAG post-processing to encourage copy elimination.
2026//===----------------------------------------------------------------------===//
2027
2028namespace {
2029
2030/// Post-process the DAG to create weak edges from all uses of a copy to
2031/// the one use that defines the copy's source vreg, most likely an induction
2032/// variable increment.
2033class CopyConstrain : public ScheduleDAGMutation {
2034 // Transient state.
2035 SlotIndex RegionBeginIdx;
2036
2037 // RegionEndIdx is the slot index of the last non-debug instruction in the
2038 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2039 SlotIndex RegionEndIdx;
2040
2041public:
2042 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2043
2044 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2045
2046protected:
2047 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2048};
2049
2050} // end anonymous namespace
2051
2052namespace llvm {
2053
2054std::unique_ptr<ScheduleDAGMutation>
2056 const TargetRegisterInfo *TRI) {
2057 return std::make_unique<CopyConstrain>(TII, TRI);
2058}
2059
2060} // end namespace llvm
2061
2062/// constrainLocalCopy handles two possibilities:
2063/// 1) Local src:
2064/// I0: = dst
2065/// I1: src = ...
2066/// I2: = dst
2067/// I3: dst = src (copy)
2068/// (create pred->succ edges I0->I1, I2->I1)
2069///
2070/// 2) Local copy:
2071/// I0: dst = src (copy)
2072/// I1: = dst
2073/// I2: src = ...
2074/// I3: = dst
2075/// (create pred->succ edges I1->I2, I3->I2)
2076///
2077/// Although the MachineScheduler is currently constrained to single blocks,
2078/// this algorithm should handle extended blocks. An EBB is a set of
2079/// contiguously numbered blocks such that the previous block in the EBB is
2080/// always the single predecessor.
2081void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2082 LiveIntervals *LIS = DAG->getLIS();
2083 MachineInstr *Copy = CopySU->getInstr();
2084
2085 // Check for pure vreg copies.
2086 const MachineOperand &SrcOp = Copy->getOperand(1);
2087 Register SrcReg = SrcOp.getReg();
2088 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2089 return;
2090
2091 const MachineOperand &DstOp = Copy->getOperand(0);
2092 Register DstReg = DstOp.getReg();
2093 if (!DstReg.isVirtual() || DstOp.isDead())
2094 return;
2095
2096 // Check if either the dest or source is local. If it's live across a back
2097 // edge, it's not local. Note that if both vregs are live across the back
2098 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2099 // If both the copy's source and dest are local live intervals, then we
2100 // should treat the dest as the global for the purpose of adding
2101 // constraints. This adds edges from source's other uses to the copy.
2102 unsigned LocalReg = SrcReg;
2103 unsigned GlobalReg = DstReg;
2104 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2105 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2106 LocalReg = DstReg;
2107 GlobalReg = SrcReg;
2108 LocalLI = &LIS->getInterval(LocalReg);
2109 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2110 return;
2111 }
2112 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2113
2114 // Find the global segment after the start of the local LI.
2115 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2116 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2117 // local live range. We could create edges from other global uses to the local
2118 // start, but the coalescer should have already eliminated these cases, so
2119 // don't bother dealing with it.
2120 if (GlobalSegment == GlobalLI->end())
2121 return;
2122
2123 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2124 // returned the next global segment. But if GlobalSegment overlaps with
2125 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2126 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2127 if (GlobalSegment->contains(LocalLI->beginIndex()))
2128 ++GlobalSegment;
2129
2130 if (GlobalSegment == GlobalLI->end())
2131 return;
2132
2133 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2134 if (GlobalSegment != GlobalLI->begin()) {
2135 // Two address defs have no hole.
2136 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2137 GlobalSegment->start)) {
2138 return;
2139 }
2140 // If the prior global segment may be defined by the same two-address
2141 // instruction that also defines LocalLI, then can't make a hole here.
2142 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2143 LocalLI->beginIndex())) {
2144 return;
2145 }
2146 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2147 // it would be a disconnected component in the live range.
2148 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2149 "Disconnected LRG within the scheduling region.");
2150 }
2151 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2152 if (!GlobalDef)
2153 return;
2154
2155 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2156 if (!GlobalSU)
2157 return;
2158
2159 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2160 // constraining the uses of the last local def to precede GlobalDef.
2161 SmallVector<SUnit*,8> LocalUses;
2162 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2163 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2164 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2165 for (const SDep &Succ : LastLocalSU->Succs) {
2166 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2167 continue;
2168 if (Succ.getSUnit() == GlobalSU)
2169 continue;
2170 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2171 return;
2172 LocalUses.push_back(Succ.getSUnit());
2173 }
2174 // Open the top of the GlobalLI hole by constraining any earlier global uses
2175 // to precede the start of LocalLI.
2176 SmallVector<SUnit*,8> GlobalUses;
2177 MachineInstr *FirstLocalDef =
2178 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2179 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2180 for (const SDep &Pred : GlobalSU->Preds) {
2181 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2182 continue;
2183 if (Pred.getSUnit() == FirstLocalSU)
2184 continue;
2185 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2186 return;
2187 GlobalUses.push_back(Pred.getSUnit());
2188 }
2189 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2190 // Add the weak edges.
2191 for (SUnit *LU : LocalUses) {
2192 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2193 << GlobalSU->NodeNum << ")\n");
2194 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2195 }
2196 for (SUnit *GU : GlobalUses) {
2197 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2198 << FirstLocalSU->NodeNum << ")\n");
2199 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2200 }
2201}
2202
2203/// Callback from DAG postProcessing to create weak edges to encourage
2204/// copy elimination.
2205void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2206 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2207 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2208
2209 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2210 if (FirstPos == DAG->end())
2211 return;
2212 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2213 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2214 *priorNonDebug(DAG->end(), DAG->begin()));
2215
2216 for (SUnit &SU : DAG->SUnits) {
2217 if (!SU.getInstr()->isCopy())
2218 continue;
2219
2220 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2221 }
2222}
2223
2224//===----------------------------------------------------------------------===//
2225// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2226// and possibly other custom schedulers.
2227//===----------------------------------------------------------------------===//
2228
2229static const unsigned InvalidCycle = ~0U;
2230
2232
2233/// Given a Count of resource usage and a Latency value, return true if a
2234/// SchedBoundary becomes resource limited.
2235/// If we are checking after scheduling a node, we should return true when
2236/// we just reach the resource limit.
2237static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2238 unsigned Latency, bool AfterSchedNode) {
2239 int ResCntFactor = (int)(Count - (Latency * LFactor));
2240 if (AfterSchedNode)
2241 return ResCntFactor >= (int)LFactor;
2242 else
2243 return ResCntFactor > (int)LFactor;
2244}
2245
2247 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2248 // Destroying and reconstructing it is very expensive though. So keep
2249 // invalid, placeholder HazardRecs.
2250 if (HazardRec && HazardRec->isEnabled()) {
2251 delete HazardRec;
2252 HazardRec = nullptr;
2253 }
2254 Available.clear();
2255 Pending.clear();
2256 CheckPending = false;
2257 CurrCycle = 0;
2258 CurrMOps = 0;
2259 MinReadyCycle = std::numeric_limits<unsigned>::max();
2260 ExpectedLatency = 0;
2261 DependentLatency = 0;
2262 RetiredMOps = 0;
2263 MaxExecutedResCount = 0;
2264 ZoneCritResIdx = 0;
2265 IsResourceLimited = false;
2266 ReservedCycles.clear();
2267 ReservedResourceSegments.clear();
2268 ReservedCyclesIndex.clear();
2269 ResourceGroupSubUnitMasks.clear();
2270#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2271 // Track the maximum number of stall cycles that could arise either from the
2272 // latency of a DAG edge or the number of cycles that a processor resource is
2273 // reserved (SchedBoundary::ReservedCycles).
2274 MaxObservedStall = 0;
2275#endif
2276 // Reserve a zero-count for invalid CritResIdx.
2277 ExecutedResCounts.resize(1);
2278 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2279}
2280
2282init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2283 reset();
2284 if (!SchedModel->hasInstrSchedModel())
2285 return;
2287 for (SUnit &SU : DAG->SUnits) {
2288 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2289 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2290 * SchedModel->getMicroOpFactor();
2292 PI = SchedModel->getWriteProcResBegin(SC),
2293 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2294 unsigned PIdx = PI->ProcResourceIdx;
2295 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2296 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2297 RemainingCounts[PIdx] +=
2298 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2299 }
2300 }
2301}
2302
2304init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2305 reset();
2306 DAG = dag;
2307 SchedModel = smodel;
2308 Rem = rem;
2310 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2311 ReservedCyclesIndex.resize(ResourceCount);
2312 ExecutedResCounts.resize(ResourceCount);
2313 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2314 unsigned NumUnits = 0;
2315
2316 for (unsigned i = 0; i < ResourceCount; ++i) {
2317 ReservedCyclesIndex[i] = NumUnits;
2318 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2319 if (isUnbufferedGroup(i)) {
2320 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2321 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2322 U != UE; ++U)
2323 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2324 }
2325 }
2326
2327 ReservedCycles.resize(NumUnits, InvalidCycle);
2328 }
2329}
2330
2331/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2332/// these "soft stalls" differently than the hard stall cycles based on CPU
2333/// resources and computed by checkHazard(). A fully in-order model
2334/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2335/// available for scheduling until they are ready. However, a weaker in-order
2336/// model may use this for heuristics. For example, if a processor has in-order
2337/// behavior when reading certain resources, this may come into play.
2339 if (!SU->isUnbuffered)
2340 return 0;
2341
2342 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2343 if (ReadyCycle > CurrCycle)
2344 return ReadyCycle - CurrCycle;
2345 return 0;
2346}
2347
2348/// Compute the next cycle at which the given processor resource unit
2349/// can be scheduled.
2351 unsigned ReleaseAtCycle,
2352 unsigned AcquireAtCycle) {
2354 if (isTop())
2355 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2356 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2357
2358 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2359 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2360 }
2361
2362 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2363 // If this resource has never been used, always return cycle zero.
2364 if (NextUnreserved == InvalidCycle)
2365 return CurrCycle;
2366 // For bottom-up scheduling add the cycles needed for the current operation.
2367 if (!isTop())
2368 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2369 return NextUnreserved;
2370}
2371
2372/// Compute the next cycle at which the given processor resource can be
2373/// scheduled. Returns the next cycle and the index of the processor resource
2374/// instance in the reserved cycles vector.
2375std::pair<unsigned, unsigned>
2377 unsigned ReleaseAtCycle,
2378 unsigned AcquireAtCycle) {
2380 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2382 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2383 }
2384 unsigned MinNextUnreserved = InvalidCycle;
2385 unsigned InstanceIdx = 0;
2386 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2387 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2388 assert(NumberOfInstances > 0 &&
2389 "Cannot have zero instances of a ProcResource");
2390
2391 if (isUnbufferedGroup(PIdx)) {
2392 // If any subunits are used by the instruction, report that the
2393 // subunits of the resource group are available at the first cycle
2394 // in which the unit is available, effectively removing the group
2395 // record from hazarding and basing the hazarding decisions on the
2396 // subunit records. Otherwise, choose the first available instance
2397 // from among the subunits. Specifications which assign cycles to
2398 // both the subunits and the group or which use an unbuffered
2399 // group with buffered subunits will appear to schedule
2400 // strangely. In the first case, the additional cycles for the
2401 // group will be ignored. In the second, the group will be
2402 // ignored entirely.
2403 for (const MCWriteProcResEntry &PE :
2406 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2407 return std::make_pair(getNextResourceCycleByInstance(
2408 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2409 StartIndex);
2410
2411 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2412 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2413 unsigned NextUnreserved, NextInstanceIdx;
2414 std::tie(NextUnreserved, NextInstanceIdx) =
2415 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2416 if (MinNextUnreserved > NextUnreserved) {
2417 InstanceIdx = NextInstanceIdx;
2418 MinNextUnreserved = NextUnreserved;
2419 }
2420 }
2421 return std::make_pair(MinNextUnreserved, InstanceIdx);
2422 }
2423
2424 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2425 ++I) {
2426 unsigned NextUnreserved =
2427 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2429 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2430 << NextUnreserved << "c\n");
2431 if (MinNextUnreserved > NextUnreserved) {
2432 InstanceIdx = I;
2433 MinNextUnreserved = NextUnreserved;
2434 }
2435 }
2437 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2438 << "[" << InstanceIdx - StartIndex << "]"
2439 << " available @" << MinNextUnreserved << "c"
2440 << "\n");
2441 return std::make_pair(MinNextUnreserved, InstanceIdx);
2442}
2443
2444/// Does this SU have a hazard within the current instruction group.
2445///
2446/// The scheduler supports two modes of hazard recognition. The first is the
2447/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2448/// supports highly complicated in-order reservation tables
2449/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2450///
2451/// The second is a streamlined mechanism that checks for hazards based on
2452/// simple counters that the scheduler itself maintains. It explicitly checks
2453/// for instruction dispatch limitations, including the number of micro-ops that
2454/// can dispatch per cycle.
2455///
2456/// TODO: Also check whether the SU must start a new group.
2458 if (HazardRec->isEnabled()
2460 return true;
2461 }
2462
2463 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2464 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2465 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2466 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2467 return true;
2468 }
2469
2470 if (CurrMOps > 0 &&
2471 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2472 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2473 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2474 << (isTop() ? "begin" : "end") << " group\n");
2475 return true;
2476 }
2477
2479 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2480 for (const MCWriteProcResEntry &PE :
2483 unsigned ResIdx = PE.ProcResourceIdx;
2484 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2485 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2486 unsigned NRCycle, InstanceIdx;
2487 std::tie(NRCycle, InstanceIdx) =
2488 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2489 if (NRCycle > CurrCycle) {
2490#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2491 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2492#endif
2493 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2494 << SchedModel->getResourceName(ResIdx)
2495 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2496 << "=" << NRCycle << "c\n");
2497 return true;
2498 }
2499 }
2500 }
2501 return false;
2502}
2503
2504// Find the unscheduled node in ReadySUs with the highest latency.
2507 SUnit *LateSU = nullptr;
2508 unsigned RemLatency = 0;
2509 for (SUnit *SU : ReadySUs) {
2510 unsigned L = getUnscheduledLatency(SU);
2511 if (L > RemLatency) {
2512 RemLatency = L;
2513 LateSU = SU;
2514 }
2515 }
2516 if (LateSU) {
2517 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2518 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2519 }
2520 return RemLatency;
2521}
2522
2523// Count resources in this zone and the remaining unscheduled
2524// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2525// resource index, or zero if the zone is issue limited.
2527getOtherResourceCount(unsigned &OtherCritIdx) {
2528 OtherCritIdx = 0;
2530 return 0;
2531
2532 unsigned OtherCritCount = Rem->RemIssueCount
2533 + (RetiredMOps * SchedModel->getMicroOpFactor());
2534 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2535 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2536 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2537 PIdx != PEnd; ++PIdx) {
2538 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2539 if (OtherCount > OtherCritCount) {
2540 OtherCritCount = OtherCount;
2541 OtherCritIdx = PIdx;
2542 }
2543 }
2544 if (OtherCritIdx) {
2545 LLVM_DEBUG(
2546 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2547 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2548 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2549 }
2550 return OtherCritCount;
2551}
2552
2553void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2554 unsigned Idx) {
2555 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2556
2557#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2558 // ReadyCycle was been bumped up to the CurrCycle when this node was
2559 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2560 // scheduling, so may now be greater than ReadyCycle.
2561 if (ReadyCycle > CurrCycle)
2562 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2563#endif
2564
2565 if (ReadyCycle < MinReadyCycle)
2566 MinReadyCycle = ReadyCycle;
2567
2568 // Check for interlocks first. For the purpose of other heuristics, an
2569 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2570 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2571 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2573
2574 if (!HazardDetected) {
2575 Available.push(SU);
2576
2577 if (InPQueue)
2579 return;
2580 }
2581
2582 if (!InPQueue)
2583 Pending.push(SU);
2584}
2585
2586/// Move the boundary of scheduled code by one cycle.
2587void SchedBoundary::bumpCycle(unsigned NextCycle) {
2588 if (SchedModel->getMicroOpBufferSize() == 0) {
2589 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2590 "MinReadyCycle uninitialized");
2591 if (MinReadyCycle > NextCycle)
2592 NextCycle = MinReadyCycle;
2593 }
2594 // Update the current micro-ops, which will issue in the next cycle.
2595 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2596 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2597
2598 // Decrement DependentLatency based on the next cycle.
2599 if ((NextCycle - CurrCycle) > DependentLatency)
2600 DependentLatency = 0;
2601 else
2602 DependentLatency -= (NextCycle - CurrCycle);
2603
2604 if (!HazardRec->isEnabled()) {
2605 // Bypass HazardRec virtual calls.
2606 CurrCycle = NextCycle;
2607 } else {
2608 // Bypass getHazardType calls in case of long latency.
2609 for (; CurrCycle != NextCycle; ++CurrCycle) {
2610 if (isTop())
2612 else
2614 }
2615 }
2616 CheckPending = true;
2617 IsResourceLimited =
2619 getScheduledLatency(), true);
2620
2621 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2622 << '\n');
2623}
2624
2625void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2626 ExecutedResCounts[PIdx] += Count;
2627 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2628 MaxExecutedResCount = ExecutedResCounts[PIdx];
2629}
2630
2631/// Add the given processor resource to this scheduled zone.
2632///
2633/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2634/// cycles during which this resource is released.
2635///
2636/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2637/// cycles at which the resource is aquired after issue (assuming no stalls).
2638///
2639/// \return the next cycle at which the instruction may execute without
2640/// oversubscribing resources.
2641unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2642 unsigned ReleaseAtCycle,
2643 unsigned NextCycle,
2644 unsigned AcquireAtCycle) {
2645 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2646 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2647 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2648 << ReleaseAtCycle << "x" << Factor << "u\n");
2649
2650 // Update Executed resources counts.
2651 incExecutedResources(PIdx, Count);
2652 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2653 Rem->RemainingCounts[PIdx] -= Count;
2654
2655 // Check if this resource exceeds the current critical resource. If so, it
2656 // becomes the critical resource.
2657 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2658 ZoneCritResIdx = PIdx;
2659 LLVM_DEBUG(dbgs() << " *** Critical resource "
2660 << SchedModel->getResourceName(PIdx) << ": "
2662 << "c\n");
2663 }
2664 // For reserved resources, record the highest cycle using the resource.
2665 unsigned NextAvailable, InstanceIdx;
2666 std::tie(NextAvailable, InstanceIdx) =
2667 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2668 if (NextAvailable > CurrCycle) {
2669 LLVM_DEBUG(dbgs() << " Resource conflict: "
2670 << SchedModel->getResourceName(PIdx)
2671 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2672 << " reserved until @" << NextAvailable << "\n");
2673 }
2674 return NextAvailable;
2675}
2676
2677/// Move the boundary of scheduled code by one SUnit.
2679 // Update the reservation table.
2680 if (HazardRec->isEnabled()) {
2681 if (!isTop() && SU->isCall) {
2682 // Calls are scheduled with their preceding instructions. For bottom-up
2683 // scheduling, clear the pipeline state before emitting.
2684 HazardRec->Reset();
2685 }
2687 // Scheduling an instruction may have made pending instructions available.
2688 CheckPending = true;
2689 }
2690 // checkHazard should prevent scheduling multiple instructions per cycle that
2691 // exceed the issue width.
2692 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2693 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2694 assert(
2695 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2696 "Cannot schedule this instruction's MicroOps in the current cycle.");
2697
2698 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2699 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2700
2701 unsigned NextCycle = CurrCycle;
2702 switch (SchedModel->getMicroOpBufferSize()) {
2703 case 0:
2704 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2705 break;
2706 case 1:
2707 if (ReadyCycle > NextCycle) {
2708 NextCycle = ReadyCycle;
2709 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2710 }
2711 break;
2712 default:
2713 // We don't currently model the OOO reorder buffer, so consider all
2714 // scheduled MOps to be "retired". We do loosely model in-order resource
2715 // latency. If this instruction uses an in-order resource, account for any
2716 // likely stall cycles.
2717 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2718 NextCycle = ReadyCycle;
2719 break;
2720 }
2721 RetiredMOps += IncMOps;
2722
2723 // Update resource counts and critical resource.
2725 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2726 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2727 Rem->RemIssueCount -= DecRemIssue;
2728 if (ZoneCritResIdx) {
2729 // Scale scheduled micro-ops for comparing with the critical resource.
2730 unsigned ScaledMOps =
2731 RetiredMOps * SchedModel->getMicroOpFactor();
2732
2733 // If scaled micro-ops are now more than the previous critical resource by
2734 // a full cycle, then micro-ops issue becomes critical.
2735 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2736 >= (int)SchedModel->getLatencyFactor()) {
2737 ZoneCritResIdx = 0;
2738 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2739 << ScaledMOps / SchedModel->getLatencyFactor()
2740 << "c\n");
2741 }
2742 }
2745 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2746 unsigned RCycle =
2747 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2748 PI->AcquireAtCycle);
2749 if (RCycle > NextCycle)
2750 NextCycle = RCycle;
2751 }
2752 if (SU->hasReservedResource) {
2753 // For reserved resources, record the highest cycle using the resource.
2754 // For top-down scheduling, this is the cycle in which we schedule this
2755 // instruction plus the number of cycles the operations reserves the
2756 // resource. For bottom-up is it simply the instruction's cycle.
2759 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2760 unsigned PIdx = PI->ProcResourceIdx;
2761 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2762
2764 unsigned ReservedUntil, InstanceIdx;
2765 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2766 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2767 if (isTop()) {
2768 ReservedResourceSegments[InstanceIdx].add(
2770 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2772 } else {
2773 ReservedResourceSegments[InstanceIdx].add(
2775 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2777 }
2778 } else {
2779
2780 unsigned ReservedUntil, InstanceIdx;
2781 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2782 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2783 if (isTop()) {
2784 ReservedCycles[InstanceIdx] =
2785 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2786 } else
2787 ReservedCycles[InstanceIdx] = NextCycle;
2788 }
2789 }
2790 }
2791 }
2792 }
2793 // Update ExpectedLatency and DependentLatency.
2794 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2795 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2796 if (SU->getDepth() > TopLatency) {
2797 TopLatency = SU->getDepth();
2798 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2799 << SU->NodeNum << ") " << TopLatency << "c\n");
2800 }
2801 if (SU->getHeight() > BotLatency) {
2802 BotLatency = SU->getHeight();
2803 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2804 << SU->NodeNum << ") " << BotLatency << "c\n");
2805 }
2806 // If we stall for any reason, bump the cycle.
2807 if (NextCycle > CurrCycle)
2808 bumpCycle(NextCycle);
2809 else
2810 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2811 // resource limited. If a stall occurred, bumpCycle does this.
2812 IsResourceLimited =
2814 getScheduledLatency(), true);
2815
2816 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2817 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2818 // one cycle. Since we commonly reach the max MOps here, opportunistically
2819 // bump the cycle to avoid uselessly checking everything in the readyQ.
2820 CurrMOps += IncMOps;
2821
2822 // Bump the cycle count for issue group constraints.
2823 // This must be done after NextCycle has been adjust for all other stalls.
2824 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2825 // currCycle to X.
2826 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2827 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2828 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2829 << " group\n");
2830 bumpCycle(++NextCycle);
2831 }
2832
2833 while (CurrMOps >= SchedModel->getIssueWidth()) {
2834 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2835 << CurrCycle << '\n');
2836 bumpCycle(++NextCycle);
2837 }
2839}
2840
2841/// Release pending ready nodes in to the available queue. This makes them
2842/// visible to heuristics.
2844 // If the available queue is empty, it is safe to reset MinReadyCycle.
2845 if (Available.empty())
2846 MinReadyCycle = std::numeric_limits<unsigned>::max();
2847
2848 // Check to see if any of the pending instructions are ready to issue. If
2849 // so, add them to the available queue.
2850 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2851 SUnit *SU = *(Pending.begin() + I);
2852 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2853
2854 if (ReadyCycle < MinReadyCycle)
2855 MinReadyCycle = ReadyCycle;
2856
2857 if (Available.size() >= ReadyListLimit)
2858 break;
2859
2860 releaseNode(SU, ReadyCycle, true, I);
2861 if (E != Pending.size()) {
2862 --I;
2863 --E;
2864 }
2865 }
2866 CheckPending = false;
2867}
2868
2869/// Remove SU from the ready set for this boundary.
2871 if (Available.isInQueue(SU))
2873 else {
2874 assert(Pending.isInQueue(SU) && "bad ready count");
2876 }
2877}
2878
2879/// If this queue only has one ready candidate, return it. As a side effect,
2880/// defer any nodes that now hit a hazard, and advance the cycle until at least
2881/// one node is ready. If multiple instructions are ready, return NULL.
2883 if (CheckPending)
2885
2886 // Defer any ready instrs that now have a hazard.
2888 if (checkHazard(*I)) {
2889 Pending.push(*I);
2890 I = Available.remove(I);
2891 continue;
2892 }
2893 ++I;
2894 }
2895 for (unsigned i = 0; Available.empty(); ++i) {
2896// FIXME: Re-enable assert once PR20057 is resolved.
2897// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2898// "permanent hazard");
2899 (void)i;
2900 bumpCycle(CurrCycle + 1);
2902 }
2903
2906
2907 if (Available.size() == 1)
2908 return *Available.begin();
2909 return nullptr;
2910}
2911
2912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2913
2914/// Dump the content of the \ref ReservedCycles vector for the
2915/// resources that are used in the basic block.
2916///
2919 return;
2920
2921 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2922 unsigned StartIdx = 0;
2923
2924 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2925 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2926 std::string ResName = SchedModel->getResourceName(ResIdx);
2927 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2928 dbgs() << ResName << "(" << UnitIdx << ") = ";
2930 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2931 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2932 else
2933 dbgs() << "{ }\n";
2934 } else
2935 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2936 }
2937 StartIdx += NumUnits;
2938 }
2939}
2940
2941// This is useful information to dump after bumpNode.
2942// Note that the Queue contents are more useful before pickNodeFromQueue.
2944 unsigned ResFactor;
2945 unsigned ResCount;
2946 if (ZoneCritResIdx) {
2947 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2948 ResCount = getResourceCount(ZoneCritResIdx);
2949 } else {
2950 ResFactor = SchedModel->getMicroOpFactor();
2951 ResCount = RetiredMOps * ResFactor;
2952 }
2953 unsigned LFactor = SchedModel->getLatencyFactor();
2954 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2955 << " Retired: " << RetiredMOps;
2956 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2957 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2958 << ResCount / ResFactor << " "
2959 << SchedModel->getResourceName(ZoneCritResIdx)
2960 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2961 << (IsResourceLimited ? " - Resource" : " - Latency")
2962 << " limited.\n";
2965}
2966#endif
2967
2968//===----------------------------------------------------------------------===//
2969// GenericScheduler - Generic implementation of MachineSchedStrategy.
2970//===----------------------------------------------------------------------===//
2971
2974 const TargetSchedModel *SchedModel) {
2976 return;
2977
2978 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2981 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2982 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2983 ResDelta.CritResources += PI->ReleaseAtCycle;
2984 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2985 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2986 }
2987}
2988
2989/// Compute remaining latency. We need this both to determine whether the
2990/// overall schedule has become latency-limited and whether the instructions
2991/// outside this zone are resource or latency limited.
2992///
2993/// The "dependent" latency is updated incrementally during scheduling as the
2994/// max height/depth of scheduled nodes minus the cycles since it was
2995/// scheduled:
2996/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2997///
2998/// The "independent" latency is the max ready queue depth:
2999/// ILat = max N.depth for N in Available|Pending
3000///
3001/// RemainingLatency is the greater of independent and dependent latency.
3002///
3003/// These computations are expensive, especially in DAGs with many edges, so
3004/// only do them if necessary.
3005static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3006 unsigned RemLatency = CurrZone.getDependentLatency();
3007 RemLatency = std::max(RemLatency,
3008 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3009 RemLatency = std::max(RemLatency,
3010 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3011 return RemLatency;
3012}
3013
3014/// Returns true if the current cycle plus remaning latency is greater than
3015/// the critical path in the scheduling region.
3016bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3017 SchedBoundary &CurrZone,
3018 bool ComputeRemLatency,
3019 unsigned &RemLatency) const {
3020 // The current cycle is already greater than the critical path, so we are
3021 // already latency limited and don't need to compute the remaining latency.
3022 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3023 return true;
3024
3025 // If we haven't scheduled anything yet, then we aren't latency limited.
3026 if (CurrZone.getCurrCycle() == 0)
3027 return false;
3028
3029 if (ComputeRemLatency)
3030 RemLatency = computeRemLatency(CurrZone);
3031
3032 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3033}
3034
3035/// Set the CandPolicy given a scheduling zone given the current resources and
3036/// latencies inside and outside the zone.
3038 SchedBoundary &CurrZone,
3039 SchedBoundary *OtherZone) {
3040 // Apply preemptive heuristics based on the total latency and resources
3041 // inside and outside this zone. Potential stalls should be considered before
3042 // following this policy.
3043
3044 // Compute the critical resource outside the zone.
3045 unsigned OtherCritIdx = 0;
3046 unsigned OtherCount =
3047 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3048
3049 bool OtherResLimited = false;
3050 unsigned RemLatency = 0;
3051 bool RemLatencyComputed = false;
3052 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3053 RemLatency = computeRemLatency(CurrZone);
3054 RemLatencyComputed = true;
3055 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3056 OtherCount, RemLatency, false);
3057 }
3058
3059 // Schedule aggressively for latency in PostRA mode. We don't check for
3060 // acyclic latency during PostRA, and highly out-of-order processors will
3061 // skip PostRA scheduling.
3062 if (!OtherResLimited &&
3063 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3064 RemLatency))) {
3065 Policy.ReduceLatency |= true;
3066 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3067 << " RemainingLatency " << RemLatency << " + "
3068 << CurrZone.getCurrCycle() << "c > CritPath "
3069 << Rem.CriticalPath << "\n");
3070 }
3071 // If the same resource is limiting inside and outside the zone, do nothing.
3072 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3073 return;
3074
3075 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3076 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3077 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3078 } if (OtherResLimited) dbgs()
3079 << " RemainingLimit: "
3080 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3081 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3082 << " Latency limited both directions.\n");
3083
3084 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3085 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3086
3087 if (OtherResLimited)
3088 Policy.DemandResIdx = OtherCritIdx;
3089}
3090
3091#ifndef NDEBUG
3094 switch (Reason) {
3095 case NoCand: return "NOCAND ";
3096 case Only1: return "ONLY1 ";
3097 case PhysReg: return "PHYS-REG ";
3098 case RegExcess: return "REG-EXCESS";
3099 case RegCritical: return "REG-CRIT ";
3100 case Stall: return "STALL ";
3101 case Cluster: return "CLUSTER ";
3102 case Weak: return "WEAK ";
3103 case RegMax: return "REG-MAX ";
3104 case ResourceReduce: return "RES-REDUCE";
3105 case ResourceDemand: return "RES-DEMAND";
3106 case TopDepthReduce: return "TOP-DEPTH ";
3107 case TopPathReduce: return "TOP-PATH ";
3108 case BotHeightReduce:return "BOT-HEIGHT";
3109 case BotPathReduce: return "BOT-PATH ";
3110 case NextDefUse: return "DEF-USE ";
3111 case NodeOrder: return "ORDER ";
3112 };
3113 llvm_unreachable("Unknown reason!");
3114}
3115
3118 unsigned ResIdx = 0;
3119 unsigned Latency = 0;
3120 switch (Cand.Reason) {
3121 default:
3122 break;
3123 case RegExcess:
3124 P = Cand.RPDelta.Excess;
3125 break;
3126 case RegCritical:
3127 P = Cand.RPDelta.CriticalMax;
3128 break;
3129 case RegMax:
3130 P = Cand.RPDelta.CurrentMax;
3131 break;
3132 case ResourceReduce:
3133 ResIdx = Cand.Policy.ReduceResIdx;
3134 break;
3135 case ResourceDemand:
3136 ResIdx = Cand.Policy.DemandResIdx;
3137 break;
3138 case TopDepthReduce:
3139 Latency = Cand.SU->getDepth();
3140 break;
3141 case TopPathReduce:
3142 Latency = Cand.SU->getHeight();
3143 break;
3144 case BotHeightReduce:
3145 Latency = Cand.SU->getHeight();
3146 break;
3147 case BotPathReduce:
3148 Latency = Cand.SU->getDepth();
3149 break;
3150 }
3151 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3152 if (P.isValid())
3153 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3154 << ":" << P.getUnitInc() << " ";
3155 else
3156 dbgs() << " ";
3157 if (ResIdx)
3158 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3159 else
3160 dbgs() << " ";
3161 if (Latency)
3162 dbgs() << " " << Latency << " cycles ";
3163 else
3164 dbgs() << " ";
3165 dbgs() << '\n';
3166}
3167#endif
3168
3169namespace llvm {
3170/// Return true if this heuristic determines order.
3171/// TODO: Consider refactor return type of these functions as integer or enum,
3172/// as we may need to differentiate whether TryCand is better than Cand.
3173bool tryLess(int TryVal, int CandVal,
3177 if (TryVal < CandVal) {
3178 TryCand.Reason = Reason;
3179 return true;
3180 }
3181 if (TryVal > CandVal) {
3182 if (Cand.Reason > Reason)
3183 Cand.Reason = Reason;
3184 return true;
3185 }
3186 return false;
3187}
3188
3189bool tryGreater(int TryVal, int CandVal,
3193 if (TryVal > CandVal) {
3194 TryCand.Reason = Reason;
3195 return true;
3196 }
3197 if (TryVal < CandVal) {
3198 if (Cand.Reason > Reason)
3199 Cand.Reason = Reason;
3200 return true;
3201 }
3202 return false;
3203}
3204
3207 SchedBoundary &Zone) {
3208 if (Zone.isTop()) {
3209 // Prefer the candidate with the lesser depth, but only if one of them has
3210 // depth greater than the total latency scheduled so far, otherwise either
3211 // of them could be scheduled now with no stall.
3212 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3213 Zone.getScheduledLatency()) {
3214 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3216 return true;
3217 }
3218 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3220 return true;
3221 } else {
3222 // Prefer the candidate with the lesser height, but only if one of them has
3223 // height greater than the total latency scheduled so far, otherwise either
3224 // of them could be scheduled now with no stall.
3225 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3226 Zone.getScheduledLatency()) {
3227 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3229 return true;
3230 }
3231 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3233 return true;
3234 }
3235 return false;
3236}
3237} // end namespace llvm
3238
3239static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3240 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3241 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3242}
3243
3245 tracePick(Cand.Reason, Cand.AtTop);
3246}
3247
3249 assert(dag->hasVRegLiveness() &&
3250 "(PreRA)GenericScheduler needs vreg liveness");
3251 DAG = static_cast<ScheduleDAGMILive*>(dag);
3252 SchedModel = DAG->getSchedModel();
3253 TRI = DAG->TRI;
3254
3255 if (RegionPolicy.ComputeDFSResult)
3256 DAG->computeDFSResult();
3257
3258 Rem.init(DAG, SchedModel);
3259 Top.init(DAG, SchedModel, &Rem);
3260 Bot.init(DAG, SchedModel, &Rem);
3261
3262 // Initialize resource counts.
3263
3264 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3265 // are disabled, then these HazardRecs will be disabled.
3267 if (!Top.HazardRec) {
3268 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3269 }
3270 if (!Bot.HazardRec) {
3271 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3272 }
3273 TopCand.SU = nullptr;
3274 BotCand.SU = nullptr;
3275}
3276
3277/// Initialize the per-region scheduling policy.
3280 unsigned NumRegionInstrs) {
3281 const MachineFunction &MF = *Begin->getMF();
3282 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3283
3284 // Avoid setting up the register pressure tracker for small regions to save
3285 // compile time. As a rough heuristic, only track pressure when the number of
3286 // schedulable instructions exceeds half the allocatable integer register file
3287 // that is the largest legal integer regiser type.
3288 RegionPolicy.ShouldTrackPressure = true;
3289 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3291 if (TLI->isTypeLegal(LegalIntVT)) {
3292 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3293 TLI->getRegClassFor(LegalIntVT));
3294 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3295 break;
3296 }
3297 }
3298
3299 // For generic targets, we default to bottom-up, because it's simpler and more
3300 // compile-time optimizations have been implemented in that direction.
3301 RegionPolicy.OnlyBottomUp = true;
3302
3303 // Allow the subtarget to override default policy.
3304 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3305
3306 // After subtarget overrides, apply command line options.
3307 if (!EnableRegPressure) {
3308 RegionPolicy.ShouldTrackPressure = false;
3309 RegionPolicy.ShouldTrackLaneMasks = false;
3310 }
3311
3312 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3313 // e.g. -misched-bottomup=false allows scheduling in both directions.
3315 "-misched-topdown incompatible with -misched-bottomup");
3316 if (ForceBottomUp.getNumOccurrences() > 0) {
3317 RegionPolicy.OnlyBottomUp = ForceBottomUp;
3318 if (RegionPolicy.OnlyBottomUp)
3319 RegionPolicy.OnlyTopDown = false;
3320 }
3321 if (ForceTopDown.getNumOccurrences() > 0) {
3322 RegionPolicy.OnlyTopDown = ForceTopDown;
3323 if (RegionPolicy.OnlyTopDown)
3324 RegionPolicy.OnlyBottomUp = false;
3325 }
3326}
3327
3329 // Cannot completely remove virtual function even in release mode.
3330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3331 dbgs() << "GenericScheduler RegionPolicy: "
3332 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3333 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3334 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3335 << "\n";
3336#endif
3337}
3338
3339/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3340/// critical path by more cycles than it takes to drain the instruction buffer.
3341/// We estimate an upper bounds on in-flight instructions as:
3342///
3343/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3344/// InFlightIterations = AcyclicPath / CyclesPerIteration
3345/// InFlightResources = InFlightIterations * LoopResources
3346///
3347/// TODO: Check execution resources in addition to IssueCount.
3350 return;
3351
3352 // Scaled number of cycles per loop iteration.
3353 unsigned IterCount =
3356 // Scaled acyclic critical path.
3357 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3358 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3359 unsigned InFlightCount =
3360 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3361 unsigned BufferLimit =
3363
3364 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3365
3366 LLVM_DEBUG(
3367 dbgs() << "IssueCycles="
3369 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3370 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3371 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3372 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3373 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3374}
3375
3378
3379 // Some roots may not feed into ExitSU. Check all of them in case.
3380 for (const SUnit *SU : Bot.Available) {
3381 if (SU->getDepth() > Rem.CriticalPath)
3382 Rem.CriticalPath = SU->getDepth();
3383 }
3384 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3386 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3387 }
3388
3390 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3391 checkAcyclicLatency();
3392 }
3393}
3394
3395namespace llvm {
3397 const PressureChange &CandP,
3401 const TargetRegisterInfo *TRI,
3402 const MachineFunction &MF) {
3403 // If one candidate decreases and the other increases, go with it.
3404 // Invalid candidates have UnitInc==0.
3405 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3406 Reason)) {
3407 return true;
3408 }
3409 // Do not compare the magnitude of pressure changes between top and bottom
3410 // boundary.
3411 if (Cand.AtTop != TryCand.AtTop)
3412 return false;
3413
3414 // If both candidates affect the same set in the same boundary, go with the
3415 // smallest increase.
3416 unsigned TryPSet = TryP.getPSetOrMax();
3417 unsigned CandPSet = CandP.getPSetOrMax();
3418 if (TryPSet == CandPSet) {
3419 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3420 Reason);
3421 }
3422
3423 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3424 std::numeric_limits<int>::max();
3425
3426 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3427 std::numeric_limits<int>::max();
3428
3429 // If the candidates are decreasing pressure, reverse priority.
3430 if (TryP.getUnitInc() < 0)
3431 std::swap(TryRank, CandRank);
3432 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3433}
3434
3435unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3436 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3437}
3438
3439/// Minimize physical register live ranges. Regalloc wants them adjacent to
3440/// their physreg def/use.
3441///
3442/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3443/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3444/// with the operation that produces or consumes the physreg. We'll do this when
3445/// regalloc has support for parallel copies.
3446int biasPhysReg(const SUnit *SU, bool isTop) {
3447 const MachineInstr *MI = SU->getInstr();
3448
3449 if (MI->isCopy()) {
3450 unsigned ScheduledOper = isTop ? 1 : 0;
3451 unsigned UnscheduledOper = isTop ? 0 : 1;
3452 // If we have already scheduled the physreg produce/consumer, immediately
3453 // schedule the copy.
3454 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3455 return 1;
3456 // If the physreg is at the boundary, defer it. Otherwise schedule it
3457 // immediately to free the dependent. We can hoist the copy later.
3458 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3459 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3460 return AtBoundary ? -1 : 1;
3461 }
3462
3463 if (MI->isMoveImmediate()) {
3464 // If we have a move immediate and all successors have been assigned, bias
3465 // towards scheduling this later. Make sure all register defs are to
3466 // physical registers.
3467 bool DoBias = true;
3468 for (const MachineOperand &Op : MI->defs()) {
3469 if (Op.isReg() && !Op.getReg().isPhysical()) {
3470 DoBias = false;
3471 break;
3472 }
3473 }
3474
3475 if (DoBias)
3476 return isTop ? -1 : 1;
3477 }
3478
3479 return 0;
3480}
3481} // end namespace llvm
3482
3484 bool AtTop,
3485 const RegPressureTracker &RPTracker,
3486 RegPressureTracker &TempTracker) {
3487 Cand.SU = SU;
3488 Cand.AtTop = AtTop;
3489 if (DAG->isTrackingPressure()) {
3490 if (AtTop) {
3491 TempTracker.getMaxDownwardPressureDelta(
3492 Cand.SU->getInstr(),
3493 Cand.RPDelta,
3494 DAG->getRegionCriticalPSets(),
3495 DAG->getRegPressure().MaxSetPressure);
3496 } else {
3497 if (VerifyScheduling) {
3498 TempTracker.getMaxUpwardPressureDelta(
3499 Cand.SU->getInstr(),
3500 &DAG->getPressureDiff(Cand.SU),
3501 Cand.RPDelta,
3502 DAG->getRegionCriticalPSets(),
3503 DAG->getRegPressure().MaxSetPressure);
3504 } else {
3505 RPTracker.getUpwardPressureDelta(
3506 Cand.SU->getInstr(),
3507 DAG->getPressureDiff(Cand.SU),
3508 Cand.RPDelta,
3509 DAG->getRegionCriticalPSets(),
3510 DAG->getRegPressure().MaxSetPressure);
3511 }
3512 }
3513 }
3514 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3515 << " Try SU(" << Cand.SU->NodeNum << ") "
3517 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3518}
3519
3520/// Apply a set of heuristics to a new candidate. Heuristics are currently
3521/// hierarchical. This may be more efficient than a graduated cost model because
3522/// we don't need to evaluate all aspects of the model for each node in the
3523/// queue. But it's really done to make the heuristics easier to debug and
3524/// statistically analyze.
3525///
3526/// \param Cand provides the policy and current best candidate.
3527/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3528/// \param Zone describes the scheduled zone that we are extending, or nullptr
3529/// if Cand is from a different zone than TryCand.
3530/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3532 SchedCandidate &TryCand,
3533 SchedBoundary *Zone) const {
3534 // Initialize the candidate if needed.
3535 if (!Cand.isValid()) {
3536 TryCand.Reason = NodeOrder;
3537 return true;
3538 }
3539
3540 // Bias PhysReg Defs and copies to their uses and defined respectively.
3541 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3542 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3543 return TryCand.Reason != NoCand;
3544
3545 // Avoid exceeding the target's limit.
3546 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3547 Cand.RPDelta.Excess,
3548 TryCand, Cand, RegExcess, TRI,
3549 DAG->MF))
3550 return TryCand.Reason != NoCand;
3551
3552 // Avoid increasing the max critical pressure in the scheduled region.
3553 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3554 Cand.RPDelta.CriticalMax,
3555 TryCand, Cand, RegCritical, TRI,
3556 DAG->MF))
3557 return TryCand.Reason != NoCand;
3558
3559 // We only compare a subset of features when comparing nodes between
3560 // Top and Bottom boundary. Some properties are simply incomparable, in many
3561 // other instances we should only override the other boundary if something
3562 // is a clear good pick on one boundary. Skip heuristics that are more
3563 // "tie-breaking" in nature.
3564 bool SameBoundary = Zone != nullptr;
3565 if (SameBoundary) {
3566 // For loops that are acyclic path limited, aggressively schedule for
3567 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3568 // heuristics to take precedence.
3569 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3570 tryLatency(TryCand, Cand, *Zone))
3571 return TryCand.Reason != NoCand;
3572
3573 // Prioritize instructions that read unbuffered resources by stall cycles.
3574 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3575 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3576 return TryCand.Reason != NoCand;
3577 }
3578
3579 // Keep clustered nodes together to encourage downstream peephole
3580 // optimizations which may reduce resource requirements.
3581 //
3582 // This is a best effort to set things up for a post-RA pass. Optimizations
3583 // like generating loads of multiple registers should ideally be done within
3584 // the scheduler pass by combining the loads during DAG postprocessing.
3585 const SUnit *CandNextClusterSU =
3586 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3587 const SUnit *TryCandNextClusterSU =
3588 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3589 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3590 Cand.SU == CandNextClusterSU,
3591 TryCand, Cand, Cluster))
3592 return TryCand.Reason != NoCand;
3593
3594 if (SameBoundary) {
3595 // Weak edges are for clustering and other constraints.
3596 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3597 getWeakLeft(Cand.SU, Cand.AtTop),
3598 TryCand, Cand, Weak))
3599 return TryCand.Reason != NoCand;
3600 }
3601
3602 // Avoid increasing the max pressure of the entire region.
3603 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3604 Cand.RPDelta.CurrentMax,
3605 TryCand, Cand, RegMax, TRI,
3606 DAG->MF))
3607 return TryCand.Reason != NoCand;
3608
3609 if (SameBoundary) {
3610 // Avoid critical resource consumption and balance the schedule.
3611 TryCand.initResourceDelta(DAG, SchedModel);
3613 TryCand, Cand, ResourceReduce))
3614 return TryCand.Reason != NoCand;
3617 TryCand, Cand, ResourceDemand))
3618 return TryCand.Reason != NoCand;
3619
3620 // Avoid serializing long latency dependence chains.
3621 // For acyclic path limited loops, latency was already checked above.
3622 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3623 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3624 return TryCand.Reason != NoCand;
3625
3626 // Fall through to original instruction order.
3627 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3628 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3629 TryCand.Reason = NodeOrder;
3630 return true;
3631 }
3632 }
3633
3634 return false;
3635}
3636
3637/// Pick the best candidate from the queue.
3638///
3639/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3640/// DAG building. To adjust for the current scheduling location we need to
3641/// maintain the number of vreg uses remaining to be top-scheduled.
3643 const CandPolicy &ZonePolicy,
3644 const RegPressureTracker &RPTracker,
3645 SchedCandidate &Cand) {
3646 // getMaxPressureDelta temporarily modifies the tracker.
3647 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3648
3649 ReadyQueue &Q = Zone.Available;
3650 for (SUnit *SU : Q) {
3651
3652 SchedCandidate TryCand(ZonePolicy);
3653 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3654 // Pass SchedBoundary only when comparing nodes from the same boundary.
3655 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3656 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3657 // Initialize resource delta if needed in case future heuristics query it.
3658 if (TryCand.ResDelta == SchedResourceDelta())
3659 TryCand.initResourceDelta(DAG, SchedModel);
3660 Cand.setBest(TryCand);
3662 }
3663 }
3664}
3665
3666/// Pick the best candidate node from either the top or bottom queue.
3668 // Schedule as far as possible in the direction of no choice. This is most
3669 // efficient, but also provides the best heuristics for CriticalPSets.
3670 if (SUnit *SU = Bot.pickOnlyChoice()) {
3671 IsTopNode = false;
3672 tracePick(Only1, false);
3673 return SU;
3674 }
3675 if (SUnit *SU = Top.pickOnlyChoice()) {
3676 IsTopNode = true;
3677 tracePick(Only1, true);
3678 return SU;
3679 }
3680 // Set the bottom-up policy based on the state of the current bottom zone and
3681 // the instructions outside the zone, including the top zone.
3682 CandPolicy BotPolicy;
3683 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3684 // Set the top-down policy based on the state of the current top zone and
3685 // the instructions outside the zone, including the bottom zone.
3686 CandPolicy TopPolicy;
3687 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3688
3689 // See if BotCand is still valid (because we previously scheduled from Top).
3690 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3691 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3692 BotCand.Policy != BotPolicy) {
3693 BotCand.reset(CandPolicy());
3694 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3695 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3696 } else {
3697 LLVM_DEBUG(traceCandidate(BotCand));
3698#ifndef NDEBUG
3699 if (VerifyScheduling) {
3700 SchedCandidate TCand;
3701 TCand.reset(CandPolicy());
3702 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3703 assert(TCand.SU == BotCand.SU &&
3704 "Last pick result should correspond to re-picking right now");
3705 }
3706#endif
3707 }
3708
3709 // Check if the top Q has a better candidate.
3710 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3711 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3712 TopCand.Policy != TopPolicy) {
3713 TopCand.reset(CandPolicy());
3714 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3715 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3716 } else {
3717 LLVM_DEBUG(traceCandidate(TopCand));
3718#ifndef NDEBUG
3719 if (VerifyScheduling) {
3720 SchedCandidate TCand;
3721 TCand.reset(CandPolicy());
3722 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3723 assert(TCand.SU == TopCand.SU &&
3724 "Last pick result should correspond to re-picking right now");
3725 }
3726#endif
3727 }
3728
3729 // Pick best from BotCand and TopCand.
3730 assert(BotCand.isValid());
3731 assert(TopCand.isValid());
3732 SchedCandidate Cand = BotCand;
3733 TopCand.Reason = NoCand;
3734 if (tryCandidate(Cand, TopCand, nullptr)) {
3735 Cand.setBest(TopCand);
3737 }
3738
3739 IsTopNode = Cand.AtTop;
3740 tracePick(Cand);
3741 return Cand.SU;
3742}
3743
3744/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3746 if (DAG->top() == DAG->bottom()) {
3747 assert(Top.Available.empty() && Top.Pending.empty() &&
3748 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3749 return nullptr;
3750 }
3751 SUnit *SU;
3752 do {
3753 if (RegionPolicy.OnlyTopDown) {
3754 SU = Top.pickOnlyChoice();
3755 if (!SU) {
3756 CandPolicy NoPolicy;
3757 TopCand.reset(NoPolicy);
3758 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3759 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3760 tracePick(TopCand);
3761 SU = TopCand.SU;
3762 }
3763 IsTopNode = true;
3764 } else if (RegionPolicy.OnlyBottomUp) {
3765 SU = Bot.pickOnlyChoice();
3766 if (!SU) {
3767 CandPolicy NoPolicy;
3768 BotCand.reset(NoPolicy);
3769 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3770 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3771 tracePick(BotCand);
3772 SU = BotCand.SU;
3773 }
3774 IsTopNode = false;
3775 } else {
3776 SU = pickNodeBidirectional(IsTopNode);
3777 }
3778 } while (SU->isScheduled);
3779
3780 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
3781 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
3782 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
3783 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
3784 //
3785 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
3786 // isBottomReady. That is, it didn't factor into the decision to choose SU
3787 // because it isTopReady or isBottomReady, respectively. In fact, if the
3788 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
3789 // queues respectivley contain the original roots and don't get updated when
3790 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
3791 // because we schduled everything but the top roots. Conversley, if SU
3792 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
3793 // but the bottom roots. If its in a queue even coincidentally, it should be
3794 // removed so it does not get re-picked in a subsequent pickNode call.
3795 if (SU->isTopReady())
3796 Top.removeReady(SU);
3797 if (SU->isBottomReady())
3798 Bot.removeReady(SU);
3799
3800 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3801 << *SU->getInstr());
3802 return SU;
3803}
3804
3806 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3807 if (!isTop)
3808 ++InsertPos;
3809 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3810
3811 // Find already scheduled copies with a single physreg dependence and move
3812 // them just above the scheduled instruction.
3813 for (SDep &Dep : Deps) {
3814 if (Dep.getKind() != SDep::Data ||
3815 !Register::isPhysicalRegister(Dep.getReg()))
3816 continue;
3817 SUnit *DepSU = Dep.getSUnit();
3818 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3819 continue;
3820 MachineInstr *Copy = DepSU->getInstr();
3821 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3822 continue;
3823 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3824 DAG->dumpNode(*Dep.getSUnit()));
3825 DAG->moveInstruction(Copy, InsertPos);
3826 }
3827}
3828
3829/// Update the scheduler's state after scheduling a node. This is the same node
3830/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3831/// update it's state based on the current cycle before MachineSchedStrategy
3832/// does.
3833///
3834/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3835/// them here. See comments in biasPhysReg.
3836void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3837 if (IsTopNode) {
3838 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3839 Top.bumpNode(SU);
3840 if (SU->hasPhysRegUses)
3841 reschedulePhysReg(SU, true);
3842 } else {
3843 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3844 Bot.bumpNode(SU);
3845 if (SU->hasPhysRegDefs)
3846 reschedulePhysReg(SU, false);
3847 }
3848}
3849
3850/// Create the standard converging machine scheduler. This will be used as the
3851/// default scheduler if the target does not set a default.
3853 ScheduleDAGMILive *DAG =
3854 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3855 // Register DAG post-processors.
3856 //
3857 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3858 // data and pass it to later mutations. Have a single mutation that gathers
3859 // the interesting nodes in one pass.
3861
3862 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3863 // Add MacroFusion mutation if fusions are not empty.
3864 const auto &MacroFusions = STI.getMacroFusions();
3865 if (!MacroFusions.empty())
3866 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3867 return DAG;
3868}
3869
3871 return createGenericSchedLive(C);
3872}
3873
3875GenericSchedRegistry("converge", "Standard converging scheduler.",
3877
3878//===----------------------------------------------------------------------===//
3879// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3880//===----------------------------------------------------------------------===//
3881
3883 DAG = Dag;
3884 SchedModel = DAG->getSchedModel();
3885 TRI = DAG->TRI;
3886
3887 Rem.init(DAG, SchedModel);
3888 Top.init(DAG, SchedModel, &Rem);
3889 Bot.init(DAG, SchedModel, &Rem);
3890
3891 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3892 // or are disabled, then these HazardRecs will be disabled.
3894 if (!Top.HazardRec) {
3895 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3896 }
3897 if (!Bot.HazardRec) {
3898 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3899 }
3900}
3901
3904 unsigned NumRegionInstrs) {
3906 RegionPolicy.OnlyTopDown = true;
3907 RegionPolicy.OnlyBottomUp = false;
3909 RegionPolicy.OnlyTopDown = false;
3910 RegionPolicy.OnlyBottomUp = true;
3912 RegionPolicy.OnlyBottomUp = false;
3913 RegionPolicy.OnlyTopDown = false;
3914 }
3915}
3916
3919
3920 // Some roots may not feed into ExitSU. Check all of them in case.
3921 for (const SUnit *SU : Bot.Available) {
3922 if (SU->getDepth() > Rem.CriticalPath)
3923 Rem.CriticalPath = SU->getDepth();
3924 }
3925 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3927 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3928 }
3929}
3930
3931/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3932///
3933/// \param Cand provides the policy and current best candidate.
3934/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3935/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3937 SchedCandidate &TryCand) {
3938 // Initialize the candidate if needed.
3939 if (!Cand.isValid()) {
3940 TryCand.Reason = NodeOrder;
3941 return true;
3942 }
3943
3944 // Prioritize instructions that read unbuffered resources by stall cycles.
3945 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3946 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3947 return TryCand.Reason != NoCand;
3948
3949 // Keep clustered nodes together.
3950 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3951 Cand.SU == DAG->getNextClusterSucc(),
3952 TryCand, Cand, Cluster))
3953 return TryCand.Reason != NoCand;
3954
3955 // Avoid critical resource consumption and balance the schedule.
3957 TryCand, Cand, ResourceReduce))
3958 return TryCand.Reason != NoCand;
3961 TryCand, Cand, ResourceDemand))
3962 return TryCand.Reason != NoCand;
3963
3964 // Avoid serializing long latency dependence chains.
3965 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3966 return TryCand.Reason != NoCand;
3967 }
3968
3969 // Fall through to original instruction order.
3970 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3971 TryCand.Reason = NodeOrder;
3972 return true;
3973 }
3974
3975 return false;
3976}
3977
3979 SchedCandidate &Cand) {
3980 ReadyQueue &Q = Zone.Available;
3981 for (SUnit *SU : Q) {
3982 SchedCandidate TryCand(Cand.Policy);
3983 TryCand.SU = SU;
3984 TryCand.AtTop = Zone.isTop();
3985 TryCand.initResourceDelta(DAG, SchedModel);
3986 if (tryCandidate(Cand, TryCand)) {
3987 Cand.setBest(TryCand);
3989 }
3990 }
3991}
3992
3993/// Pick the best candidate node from either the top or bottom queue.
3995 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
3996 // out common parts.
3997
3998 // Schedule as far as possible in the direction of no choice. This is most
3999 // efficient, but also provides the best heuristics for CriticalPSets.
4000 if (SUnit *SU = Bot.pickOnlyChoice()) {
4001 IsTopNode = false;
4002 tracePick(Only1, false);
4003 return SU;
4004 }
4005 if (SUnit *SU = Top.pickOnlyChoice()) {
4006 IsTopNode = true;
4007 tracePick(Only1, true);
4008 return SU;
4009 }
4010 // Set the bottom-up policy based on the state of the current bottom zone and
4011 // the instructions outside the zone, including the top zone.
4012 CandPolicy BotPolicy;
4013 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4014 // Set the top-down policy based on the state of the current top zone and
4015 // the instructions outside the zone, including the bottom zone.
4016 CandPolicy TopPolicy;
4017 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4018
4019 // See if BotCand is still valid (because we previously scheduled from Top).
4020 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4021 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4022 BotCand.Policy != BotPolicy) {
4023 BotCand.reset(CandPolicy());
4024 pickNodeFromQueue(Bot, BotCand);
4025 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4026 } else {
4027 LLVM_DEBUG(traceCandidate(BotCand));
4028#ifndef NDEBUG
4029 if (VerifyScheduling) {
4030 SchedCandidate TCand;
4031 TCand.reset(CandPolicy());
4032 pickNodeFromQueue(Bot, BotCand);
4033 assert(TCand.SU == BotCand.SU &&
4034 "Last pick result should correspond to re-picking right now");
4035 }
4036#endif
4037 }
4038
4039 // Check if the top Q has a better candidate.
4040 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4041 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4042 TopCand.Policy != TopPolicy) {
4043 TopCand.reset(CandPolicy());
4044 pickNodeFromQueue(Top, TopCand);
4045 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4046 } else {
4047 LLVM_DEBUG(traceCandidate(TopCand));
4048#ifndef NDEBUG
4049 if (VerifyScheduling) {
4050 SchedCandidate TCand;
4051 TCand.reset(CandPolicy());
4052 pickNodeFromQueue(Top, TopCand);
4053 assert(TCand.SU == TopCand.SU &&
4054 "Last pick result should correspond to re-picking right now");
4055 }
4056#endif
4057 }
4058
4059 // Pick best from BotCand and TopCand.
4060 assert(BotCand.isValid());
4061 assert(TopCand.isValid());
4062 SchedCandidate Cand = BotCand;
4063 TopCand.Reason = NoCand;
4064 if (tryCandidate(Cand, TopCand)) {
4065 Cand.setBest(TopCand);
4067 }
4068
4069 IsTopNode = Cand.AtTop;
4070 tracePick(Cand);
4071 return Cand.SU;
4072}
4073
4074/// Pick the next node to schedule.
4076 if (DAG->top() == DAG->bottom()) {
4077 assert(Top.Available.empty() && Top.Pending.empty() &&
4078 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4079 return nullptr;
4080 }
4081 SUnit *SU;
4082 do {
4083 if (RegionPolicy.OnlyBottomUp) {
4084 SU = Bot.pickOnlyChoice();
4085 if (SU) {
4086 tracePick(Only1, true);
4087 } else {
4088 CandPolicy NoPolicy;
4089 BotCand.reset(NoPolicy);
4090 // Set the bottom-up policy based on the state of the current bottom
4091 // zone and the instructions outside the zone, including the top zone.
4092 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4093 pickNodeFromQueue(Bot, BotCand);
4094 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4095 tracePick(BotCand);
4096 SU = BotCand.SU;
4097 }
4098 IsTopNode = false;
4099 } else if (RegionPolicy.OnlyTopDown) {
4100 SU = Top.pickOnlyChoice();
4101 if (SU) {
4102 tracePick(Only1, true);
4103 } else {
4104 CandPolicy NoPolicy;
4105 TopCand.reset(NoPolicy);
4106 // Set the top-down policy based on the state of the current top zone
4107 // and the instructions outside the zone, including the bottom zone.
4108 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4109 pickNodeFromQueue(Top, TopCand);
4110 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4111 tracePick(TopCand);
4112 SU = TopCand.SU;
4113 }
4114 IsTopNode = true;
4115 } else {
4116 SU = pickNodeBidirectional(IsTopNode);
4117 }
4118 } while (SU->isScheduled);
4119
4120 if (SU->isTopReady())
4121 Top.removeReady(SU);
4122 if (SU->isBottomReady())
4123 Bot.removeReady(SU);
4124
4125 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4126 << *SU->getInstr());
4127 return SU;
4128}
4129
4130/// Called after ScheduleDAGMI has scheduled an instruction and updated
4131/// scheduled/remaining flags in the DAG nodes.
4132void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4133 if (IsTopNode) {
4134 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4135 Top.bumpNode(SU);
4136 } else {
4137 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4138 Bot.bumpNode(SU);
4139 }
4140}
4141
4143 ScheduleDAGMI *DAG =
4144 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4145 /*RemoveKillFlags=*/true);
4146 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4147 // Add MacroFusion mutation if fusions are not empty.
4148 const auto &MacroFusions = STI.getMacroFusions();
4149 if (!MacroFusions.empty())
4150 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4151 return DAG;
4152}
4153
4154//===----------------------------------------------------------------------===//
4155// ILP Scheduler. Currently for experimental analysis of heuristics.
4156//===----------------------------------------------------------------------===//
4157
4158namespace {
4159
4160/// Order nodes by the ILP metric.
4161struct ILPOrder {
4162 const SchedDFSResult *DFSResult = nullptr;
4163 const BitVector *ScheduledTrees = nullptr;
4164 bool MaximizeILP;
4165
4166 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4167
4168 /// Apply a less-than relation on node priority.
4169 ///
4170 /// (Return true if A comes after B in the Q.)
4171 bool operator()(const SUnit *A, const SUnit *B) const {
4172 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4173 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4174 if (SchedTreeA != SchedTreeB) {
4175 // Unscheduled trees have lower priority.
4176 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4177 return ScheduledTrees->test(SchedTreeB);
4178
4179 // Trees with shallower connections have lower priority.
4180 if (DFSResult->getSubtreeLevel(SchedTreeA)
4181 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4182 return DFSResult->getSubtreeLevel(SchedTreeA)
4183 < DFSResult->getSubtreeLevel(SchedTreeB);
4184 }
4185 }
4186 if (MaximizeILP)
4187 return DFSResult->getILP(A) < DFSResult->getILP(B);
4188 else
4189 return DFSResult->getILP(A) > DFSResult->getILP(B);
4190 }
4191};
4192
4193/// Schedule based on the ILP metric.
4194class ILPScheduler : public MachineSchedStrategy {
4195 ScheduleDAGMILive *DAG = nullptr;
4196 ILPOrder Cmp;
4197
4198 std::vector<SUnit*> ReadyQ;
4199
4200public:
4201 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4202
4203 void initialize(ScheduleDAGMI *dag) override {
4204 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4205 DAG = static_cast<ScheduleDAGMILive*>(dag);
4206 DAG->computeDFSResult();
4207 Cmp.DFSResult = DAG->getDFSResult();
4208 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4209 ReadyQ.clear();
4210 }
4211
4212 void registerRoots() override {
4213 // Restore the heap in ReadyQ with the updated DFS results.
4214 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4215 }
4216
4217 /// Implement MachineSchedStrategy interface.
4218 /// -----------------------------------------
4219
4220 /// Callback to select the highest priority node from the ready Q.
4221 SUnit *pickNode(bool &IsTopNode) override {
4222 if (ReadyQ.empty()) return nullptr;
4223 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4224 SUnit *SU = ReadyQ.back();
4225 ReadyQ.pop_back();
4226 IsTopNode = false;
4227 LLVM_DEBUG(dbgs() << "Pick node "
4228 << "SU(" << SU->NodeNum << ") "
4229 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4230 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4231 << " @"
4232 << DAG->getDFSResult()->getSubtreeLevel(
4233 DAG->getDFSResult()->getSubtreeID(SU))
4234 << '\n'
4235 << "Scheduling " << *SU->getInstr());
4236 return SU;
4237 }
4238
4239 /// Scheduler callback to notify that a new subtree is scheduled.
4240 void scheduleTree(unsigned SubtreeID) override {
4241 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4242 }
4243
4244 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4245 /// DFSResults, and resort the priority Q.
4246 void schedNode(SUnit *SU, bool IsTopNode) override {
4247 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4248 }
4249
4250 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4251
4252 void releaseBottomNode(SUnit *SU) override {
4253 ReadyQ.push_back(SU);
4254 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4255 }
4256};
4257
4258} // end anonymous namespace
4259
4261 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4262}
4264 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4265}
4266
4268 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4270 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4271
4272//===----------------------------------------------------------------------===//
4273// Machine Instruction Shuffler for Correctness Testing
4274//===----------------------------------------------------------------------===//
4275
4276#ifndef NDEBUG
4277namespace {
4278
4279/// Apply a less-than relation on the node order, which corresponds to the
4280/// instruction order prior to scheduling. IsReverse implements greater-than.
4281template<bool IsReverse>
4282struct SUnitOrder {
4283 bool operator()(SUnit *A, SUnit *B) const {
4284 if (IsReverse)
4285 return A->NodeNum > B->NodeNum;
4286 else
4287 return A->NodeNum < B->NodeNum;
4288 }
4289};
4290
4291/// Reorder instructions as much as possible.
4292class InstructionShuffler : public MachineSchedStrategy {
4293 bool IsAlternating;
4294 bool IsTopDown;
4295
4296 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4297 // gives nodes with a higher number higher priority causing the latest
4298 // instructions to be scheduled first.
4300 TopQ;
4301
4302 // When scheduling bottom-up, use greater-than as the queue priority.
4304 BottomQ;
4305
4306public:
4307 InstructionShuffler(bool alternate, bool topdown)
4308 : IsAlternating(alternate), IsTopDown(topdown) {}
4309
4310 void initialize(ScheduleDAGMI*) override {
4311 TopQ.clear();
4312 BottomQ.clear();
4313 }
4314
4315 /// Implement MachineSchedStrategy interface.
4316 /// -----------------------------------------
4317
4318 SUnit *pickNode(bool &IsTopNode) override {
4319 SUnit *SU;
4320 if (IsTopDown) {
4321 do {
4322 if (TopQ.empty()) return nullptr;
4323 SU = TopQ.top();
4324 TopQ.pop();
4325 } while (SU->isScheduled);
4326 IsTopNode = true;
4327 } else {
4328 do {
4329 if (BottomQ.empty()) return nullptr;
4330 SU = BottomQ.top();
4331 BottomQ.pop();
4332 } while (SU->isScheduled);
4333 IsTopNode = false;
4334 }
4335 if (IsAlternating)
4336 IsTopDown = !IsTopDown;
4337 return SU;
4338 }
4339
4340 void schedNode(SUnit *SU, bool IsTopNode) override {}
4341
4342 void releaseTopNode(SUnit *SU) override {
4343 TopQ.push(SU);
4344 }
4345 void releaseBottomNode(SUnit *SU) override {
4346 BottomQ.push(SU);
4347 }
4348};
4349
4350} // end anonymous namespace
4351
4353 bool Alternate = !ForceTopDown && !ForceBottomUp;
4354 bool TopDown = !ForceBottomUp;
4355 assert((TopDown || !ForceTopDown) &&
4356 "-misched-topdown incompatible with -misched-bottomup");
4357 return new ScheduleDAGMILive(
4358 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4359}
4360
4362 "shuffle", "Shuffle machine instructions alternating directions",
4364#endif // !NDEBUG
4365
4366//===----------------------------------------------------------------------===//
4367// GraphWriter support for ScheduleDAGMILive.
4368//===----------------------------------------------------------------------===//
4369
4370#ifndef NDEBUG
4371namespace llvm {
4372
4373template<> struct GraphTraits<
4375
4376template<>
4379
4380 static std::string getGraphName(const ScheduleDAG *G) {
4381 return std::string(G->MF.getName());
4382 }
4383
4385 return true;
4386 }
4387
4388 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4389 if (ViewMISchedCutoff == 0)
4390 return false;
4391 return (Node->Preds.size() > ViewMISchedCutoff
4392 || Node->Succs.size() > ViewMISchedCutoff);
4393 }
4394
4395 /// If you want to override the dot attributes printed for a particular
4396 /// edge, override this method.
4397 static std::string getEdgeAttributes(const SUnit *Node,
4398 SUnitIterator EI,
4399 const ScheduleDAG *Graph) {
4400 if (EI.isArtificialDep())
4401 return "color=cyan,style=dashed";
4402 if (EI.isCtrlDep())
4403 return "color=blue,style=dashed";
4404 return "";
4405 }
4406
4407 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4408 std::string Str;
4409 raw_string_ostream SS(Str);
4410 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4411 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4412 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4413 SS << "SU:" << SU->NodeNum;
4414 if (DFS)
4415 SS << " I:" << DFS->getNumInstrs(SU);
4416 return Str;
4417 }
4418
4419 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4420 return G->getGraphNodeLabel(SU);
4421 }
4422
4423 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4424 std::string Str("shape=Mrecord");
4425 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4426 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4427 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4428 if (DFS) {
4429 Str += ",style=filled,fillcolor=\"#";
4430 Str += DOT::getColorString(DFS->getSubtreeID(N));
4431 Str += '"';
4432 }
4433 return Str;
4434 }
4435};
4436
4437} // end namespace llvm
4438#endif // NDEBUG
4439
4440/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4441/// rendered using 'dot'.
4442void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4443#ifndef NDEBUG
4444 ViewGraph(this, Name, false, Title);
4445#else
4446 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4447 << "systems with Graphviz or gv!\n";
4448#endif // NDEBUG
4449}
4450
4451/// Out-of-line implementation with no arguments is handy for gdb.
4453 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4454}
4455
4456/// Sort predicate for the intervals stored in an instance of
4457/// ResourceSegments. Intervals are always disjoint (no intersection
4458/// for any pairs of intervals), therefore we can sort the totality of
4459/// the intervals by looking only at the left boundary.
4462 return A.first < B.first;
4463}
4464
4465unsigned ResourceSegments::getFirstAvailableAt(
4466 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4467 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4468 IntervalBuilder) const {
4469 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4470 sortIntervals) &&
4471 "Cannot execute on an un-sorted set of intervals.");
4472
4473 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4474 // a ResourceSegment interval for that situation.
4475 if (AcquireAtCycle == ReleaseAtCycle)
4476 return CurrCycle;
4477
4478 unsigned RetCycle = CurrCycle;
4479 ResourceSegments::IntervalTy NewInterval =
4480 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4481 for (auto &Interval : _Intervals) {
4482 if (!intersects(NewInterval, Interval))
4483 continue;
4484
4485 // Move the interval right next to the top of the one it
4486 // intersects.
4487 assert(Interval.second > NewInterval.first &&
4488 "Invalid intervals configuration.");
4489 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4490 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4491 }
4492 return RetCycle;
4493}
4494
4496 const unsigned CutOff) {
4497 assert(A.first <= A.second && "Cannot add negative resource usage");
4498 assert(CutOff > 0 && "0-size interval history has no use.");
4499 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4500 // instruction needed the resource to be available but does not use it.
4501 // However, ResourceSegment represents an interval that is closed on the left
4502 // and open on the right. It is impossible to represent an empty interval when
4503 // the left is closed. Do not add it to Intervals.
4504 if (A.first == A.second)
4505 return;
4506
4507 assert(all_of(_Intervals,
4508 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4509 return !intersects(A, Interval);
4510 }) &&
4511 "A resource is being overwritten");
4512 _Intervals.push_back(A);
4513
4514 sortAndMerge();
4515
4516 // Do not keep the full history of the intervals, just the
4517 // latest #CutOff.
4518 while (_Intervals.size() > CutOff)
4519 _Intervals.pop_front();
4520}
4521
4524 assert(A.first <= A.second && "Invalid interval");
4525 assert(B.first <= B.second && "Invalid interval");
4526
4527 // Share one boundary.
4528 if ((A.first == B.first) || (A.second == B.second))
4529 return true;
4530
4531 // full intersersect: [ *** ) B
4532 // [***) A
4533 if ((A.first > B.first) && (A.second < B.second))
4534 return true;
4535
4536 // right intersect: [ ***) B
4537 // [*** ) A
4538 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4539 return true;
4540
4541 // left intersect: [*** ) B
4542 // [ ***) A
4543 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4544 return true;
4545
4546 return false;
4547}
4548
4549void ResourceSegments::sortAndMerge() {
4550 if (_Intervals.size() <= 1)
4551 return;
4552
4553 // First sort the collection.
4554 _Intervals.sort(sortIntervals);
4555
4556 // can use next because I have at least 2 elements in the list
4557 auto next = std::next(std::begin(_Intervals));
4558 auto E = std::end(_Intervals);
4559 for (; next != E; ++next) {
4560 if (std::prev(next)->second >= next->first) {
4561 next->first = std::prev(next)->first;
4562 _Intervals.erase(std::prev(next));
4563 continue;
4564 }
4565 }
4566}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
bool isArtificialDep() const
Definition: ScheduleDAG.h:686
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:683
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:278
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:275
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:300
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:424
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:416
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
unsigned NumPredsLeft
Definition: ScheduleDAG.h:274
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:301
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:276
bool isBottomReady() const
Definition: ScheduleDAG.h:467
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:291
bool isTopReady() const
Definition: ScheduleDAG.h:464
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:277
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:580
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:267
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:91
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
void initializePostMachineSchedulerPass(PassRegistry &)
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
cl::opt< bool > ViewMISchedDAGs
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
cl::opt< MISchedPostRASched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISchedPostRASched::TopDown), cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
cl::opt< bool > ForceBottomUp
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineSchedulerPass(PassRegistry &)
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
cl::opt< bool > ForceTopDown
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.