35#include "llvm/Config/llvm-config.h"
44#define DEBUG_TYPE "post-RA-sched"
48STATISTIC(NumFixedAnti,
"Number of fixed anti-dependencies");
55 cl::desc(
"Enable scheduling after register allocation"),
59 cl::desc(
"Break post-RA scheduling anti-dependencies: "
60 "\"critical\", \"all\", or \"none\""),
66 cl::desc(
"Debug control MBBs that are scheduled"),
70 cl::desc(
"Debug control MBBs that are scheduled"),
97 MachineFunctionProperties::Property::NoVRegs);
103 bool enablePostRAScheduler(
108 char PostRAScheduler::ID = 0;
119 std::vector<SUnit*> PendingQueue;
134 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
140 unsigned EndIndex = 0;
143 SchedulePostRATDList(
149 ~SchedulePostRATDList()
override;
157 void setEndIndex(
unsigned EndIdx) { EndIndex = EndIdx; }
163 unsigned regioninstrs)
override;
185 void postProcessDAG();
187 void ReleaseSucc(
SUnit *SU,
SDep *SuccEdge);
188 void ReleaseSuccessors(
SUnit *SU);
189 void ScheduleNodeTopDown(
SUnit *SU,
unsigned CurCycle);
190 void ListScheduleTopDown();
192 void dumpSchedule()
const;
193 void emitNoop(
unsigned CurCycle);
200 "Post RA top-down list latency scheduler",
false,
false)
202SchedulePostRATDList::SchedulePostRATDList(
210 MF.getSubtarget().getInstrItineraryData();
212 MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
214 MF.getSubtarget().getPostRAMutations(Mutations);
216 assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
217 MRI.tracksLiveness()) &&
218 "Live-ins must be accurate for anti-dependency breaking");
219 AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)
221 : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)
226SchedulePostRATDList::~SchedulePostRATDList() {
235 unsigned regioninstrs) {
241void SchedulePostRATDList::exitRegion() {
243 dbgs() <<
"*** Final schedule ***\n";
250#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
253 for (
const SUnit *SU : Sequence) {
257 dbgs() <<
"**** NOOP ****\n";
262bool PostRAScheduler::enablePostRAScheduler(
266 Mode =
ST.getAntiDepBreakMode();
267 ST.getCriticalPathRCs(CriticalPathRCs);
273 return ST.enablePostRAScheduler() &&
274 OptLevel >=
ST.getOptLevelToEnablePostRAScheduler();
282 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
283 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
289 TargetSubtargetInfo::ANTIDEP_NONE;
295 AntiDepMode, CriticalPathRCs))
301 ? TargetSubtargetInfo::ANTIDEP_ALL
303 ? TargetSubtargetInfo::ANTIDEP_CRITICAL
304 : TargetSubtargetInfo::ANTIDEP_NONE);
309 SchedulePostRATDList
Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
313 for (
auto &
MBB : Fn) {
317 static int bbcnt = 0;
320 dbgs() <<
"*** DEBUG scheduling " << Fn.getName() <<
":"
331 unsigned Count =
MBB.
size(), CurrentCount = Count;
339 Scheduler.enterRegion(&
MBB,
I, Current, CurrentCount - Count);
345 CurrentCount = Count;
350 Count -=
MI.getBundleSize();
352 assert(Count == 0 &&
"Instruction count mismatch!");
354 "Instruction count mismatch!");
381 AntiDepBreak->StartBlock(BB);
386void SchedulePostRATDList::schedule() {
392 AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
393 EndIndex, DbgValues);
405 NumFixedAnti += Broken;
414 AvailableQueue.initNodes(SUnits);
415 ListScheduleTopDown();
416 AvailableQueue.releaseState();
422void SchedulePostRATDList::Observe(
MachineInstr &
MI,
unsigned Count) {
424 AntiDepBreak->Observe(
MI, Count, EndIndex);
429void SchedulePostRATDList::finishBlock() {
431 AntiDepBreak->FinishBlock();
438void SchedulePostRATDList::postProcessDAG() {
439 for (
auto &M : Mutations)
449void SchedulePostRATDList::ReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
458 dbgs() <<
"*** Scheduling failed! ***\n";
460 dbgs() <<
" has been released too many times!\n";
480 PendingQueue.push_back(SuccSU);
484void SchedulePostRATDList::ReleaseSuccessors(
SUnit *SU) {
487 ReleaseSucc(SU, &*
I);
494void SchedulePostRATDList::ScheduleNodeTopDown(
SUnit *SU,
unsigned CurCycle) {
500 "Node scheduled above its depth!");
503 ReleaseSuccessors(SU);
505 AvailableQueue.scheduledNode(SU);
509void SchedulePostRATDList::emitNoop(
unsigned CurCycle) {
510 LLVM_DEBUG(
dbgs() <<
"*** Emitting noop in cycle " << CurCycle <<
'\n');
511 HazardRec->EmitNoop();
518void SchedulePostRATDList::ListScheduleTopDown() {
519 unsigned CurCycle = 0;
528 ReleaseSuccessors(&EntrySU);
534 AvailableQueue.push(&
SUnit);
541 bool CycleHasInsts =
false;
545 std::vector<SUnit*> NotReady;
547 while (!AvailableQueue.empty() || !PendingQueue.empty()) {
550 unsigned MinDepth = ~0
u;
551 for (
unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
552 if (PendingQueue[i]->getDepth() <= CurCycle) {
553 AvailableQueue.push(PendingQueue[i]);
554 PendingQueue[i]->isAvailable =
true;
555 PendingQueue[i] = PendingQueue.back();
556 PendingQueue.pop_back();
558 }
else if (PendingQueue[i]->getDepth() < MinDepth)
559 MinDepth = PendingQueue[i]->getDepth();
563 AvailableQueue.dump(
this));
565 SUnit *FoundSUnit =
nullptr, *NotPreferredSUnit =
nullptr;
566 bool HasNoopHazards =
false;
567 while (!AvailableQueue.empty()) {
568 SUnit *CurSUnit = AvailableQueue.pop();
571 HazardRec->getHazardType(CurSUnit, 0);
573 if (HazardRec->ShouldPreferAnother(CurSUnit)) {
574 if (!NotPreferredSUnit) {
579 NotPreferredSUnit = CurSUnit;
583 FoundSUnit = CurSUnit;
591 NotReady.push_back(CurSUnit);
597 if (NotPreferredSUnit) {
600 dbgs() <<
"*** Will schedule a non-preferred instruction...\n");
601 FoundSUnit = NotPreferredSUnit;
603 AvailableQueue.push(NotPreferredSUnit);
606 NotPreferredSUnit =
nullptr;
610 if (!NotReady.empty()) {
611 AvailableQueue.push_all(NotReady);
618 unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
619 for (
unsigned i = 0; i != NumPreNoops; ++i)
623 ScheduleNodeTopDown(FoundSUnit, CurCycle);
624 HazardRec->EmitInstruction(FoundSUnit);
625 CycleHasInsts =
true;
626 if (HazardRec->atIssueLimit()) {
627 LLVM_DEBUG(
dbgs() <<
"*** Max instructions per cycle " << CurCycle
629 HazardRec->AdvanceCycle();
631 CycleHasInsts =
false;
636 HazardRec->AdvanceCycle();
637 }
else if (!HasNoopHazards) {
641 HazardRec->AdvanceCycle();
651 CycleHasInsts =
false;
656 unsigned ScheduledNodes = VerifyScheduledDAG(
false);
659 "The number of nodes scheduled doesn't match the expected number!");
664void SchedulePostRATDList::EmitSchedule() {
665 RegionBegin = RegionEnd;
669 BB->
splice(RegionEnd, BB, FirstDbgValue);
672 for (
unsigned i = 0, e =
Sequence.size(); i != e; i++) {
673 if (
SUnit *SU = Sequence[i])
682 RegionBegin = std::prev(RegionEnd);
686 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
687 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
688 std::pair<MachineInstr *, MachineInstr *>
P = *std::prev(DI);
694 FirstDbgValue =
nullptr;
unsigned const MachineRegisterInfo * MRI
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
const HexagonInstrInfo * TII
Machine Instruction Scheduler
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< int > DebugDiv("postra-sched-debugdiv", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)
static cl::opt< bool > EnablePostRAScheduler("post-RA-scheduler", cl::desc("Enable scheduling after register allocation"), cl::init(false), cl::Hidden)
static cl::opt< std::string > EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden)
static cl::opt< int > DebugMod("postra-sched-debugmod", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class works in conjunction with the post-RA scheduler to rename registers to break register anti...
virtual ~AntiDepBreaker()
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
Insert a noop into the instruction stream at the specified point.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
bool isWeak() const
Tests if this a weak dependence.
Scheduling unit. This is a node in the scheduling DAG.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
bool isAvailable
True once available.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVectorImpl< SDep >::iterator succ_iterator
void setDepthToAtLeast(unsigned NewDepth)
If NewDepth is greater than this node's depth value, sets it to be the new depth value.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
virtual void schedule()=0
Orders nodes according to selected style.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void clearDAG()
Clears the DAG state (between regions).
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
Target-Independent Code Generator Pass Configuration Options.
CodeGenOptLevel getOptLevel() const
TargetSubtargetInfo - Generic base class for all target subtargets.
enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
AntiDepBreaker * createAggressiveAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
AntiDepBreaker * createCriticalAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.