Go to the documentation of this file.
35 #include "llvm/Config/llvm-config.h"
44 #define DEBUG_TYPE "post-RA-sched"
46 STATISTIC(NumNoops,
"Number of noops inserted");
47 STATISTIC(NumStalls,
"Number of pipeline stalls");
48 STATISTIC(NumFixedAnti,
"Number of fixed anti-dependencies");
55 cl::desc(
"Enable scheduling after register allocation"),
59 cl::desc(
"Break post-RA scheduling anti-dependencies: "
60 "\"critical\", \"all\", or \"none\""),
66 cl::desc(
"Debug control MBBs that are scheduled"),
70 cl::desc(
"Debug control MBBs that are scheduled"),
103 bool enablePostRAScheduler(
119 std::vector<SUnit*> PendingQueue;
134 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
140 unsigned EndIndex = 0;
143 SchedulePostRATDList(
149 ~SchedulePostRATDList()
override;
157 void setEndIndex(
unsigned EndIdx) { EndIndex = EndIdx; }
163 unsigned regioninstrs)
override;
166 void exitRegion()
override;
170 void schedule()
override;
181 void finishBlock()
override;
185 void postprocessDAG();
187 void ReleaseSucc(
SUnit *SU,
SDep *SuccEdge);
188 void ReleaseSuccessors(
SUnit *SU);
189 void ScheduleNodeTopDown(
SUnit *SU,
unsigned CurCycle);
190 void ListScheduleTopDown();
192 void dumpSchedule()
const;
193 void emitNoop(
unsigned CurCycle);
200 "Post RA top-down list latency scheduler",
false,
false)
202 SchedulePostRATDList::SchedulePostRATDList(
210 MF.getSubtarget().getInstrItineraryData();
212 MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
214 MF.getSubtarget().getPostRAMutations(Mutations);
216 assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
218 "Live-ins must be accurate for anti-dependency breaking");
219 AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)
221 : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)
226 SchedulePostRATDList::~SchedulePostRATDList() {
235 unsigned regioninstrs) {
241 void SchedulePostRATDList::exitRegion() {
243 dbgs() <<
"*** Final schedule ***\n";
250 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
257 dbgs() <<
"**** NOOP ****\n";
262 bool PostRAScheduler::enablePostRAScheduler(
267 Mode =
ST.getAntiDepBreakMode();
268 ST.getCriticalPathRCs(CriticalPathRCs);
274 return ST.enablePostRAScheduler() &&
275 OptLevel >=
ST.getOptLevelToEnablePostRAScheduler();
284 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
290 TargetSubtargetInfo::ANTIDEP_NONE;
296 AntiDepMode, CriticalPathRCs))
302 ? TargetSubtargetInfo::ANTIDEP_ALL
304 ? TargetSubtargetInfo::ANTIDEP_CRITICAL
305 : TargetSubtargetInfo::ANTIDEP_NONE);
310 SchedulePostRATDList
Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
314 for (
auto &
MBB : Fn) {
318 static int bbcnt = 0;
321 dbgs() <<
"*** DEBUG scheduling " << Fn.getName() <<
":"
332 unsigned Count =
MBB.
size(), CurrentCount = Count;
340 Scheduler.enterRegion(&
MBB,
I, Current, CurrentCount - Count);
346 CurrentCount = Count;
351 Count -=
MI.getBundleSize();
353 assert(Count == 0 &&
"Instruction count mismatch!");
355 "Instruction count mismatch!");
382 AntiDepBreak->StartBlock(
BB);
387 void SchedulePostRATDList::schedule() {
393 AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
394 EndIndex, DbgValues);
406 NumFixedAnti += Broken;
415 AvailableQueue.initNodes(SUnits);
416 ListScheduleTopDown();
417 AvailableQueue.releaseState();
423 void SchedulePostRATDList::Observe(
MachineInstr &
MI,
unsigned Count) {
425 AntiDepBreak->Observe(
MI, Count, EndIndex);
430 void SchedulePostRATDList::finishBlock() {
432 AntiDepBreak->FinishBlock();
439 void SchedulePostRATDList::postprocessDAG() {
440 for (
auto &M : Mutations)
450 void SchedulePostRATDList::ReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
459 dbgs() <<
"*** Scheduling failed! ***\n";
461 dbgs() <<
" has been released too many times!\n";
481 PendingQueue.push_back(SuccSU);
485 void SchedulePostRATDList::ReleaseSuccessors(
SUnit *SU) {
488 ReleaseSucc(SU, &*
I);
495 void SchedulePostRATDList::ScheduleNodeTopDown(
SUnit *SU,
unsigned CurCycle) {
501 "Node scheduled above its depth!");
504 ReleaseSuccessors(SU);
506 AvailableQueue.scheduledNode(SU);
510 void SchedulePostRATDList::emitNoop(
unsigned CurCycle) {
511 LLVM_DEBUG(
dbgs() <<
"*** Emitting noop in cycle " << CurCycle <<
'\n');
512 HazardRec->EmitNoop();
519 void SchedulePostRATDList::ListScheduleTopDown() {
520 unsigned CurCycle = 0;
529 ReleaseSuccessors(&EntrySU);
535 AvailableQueue.push(&
SUnit);
542 bool CycleHasInsts =
false;
546 std::vector<SUnit*> NotReady;
548 while (!AvailableQueue.empty() || !PendingQueue.empty()) {
551 unsigned MinDepth = ~0u;
552 for (
unsigned i = 0,
e = PendingQueue.size();
i !=
e; ++
i) {
553 if (PendingQueue[
i]->getDepth() <= CurCycle) {
554 AvailableQueue.push(PendingQueue[
i]);
555 PendingQueue[
i]->isAvailable =
true;
556 PendingQueue[
i] = PendingQueue.back();
557 PendingQueue.pop_back();
559 }
else if (PendingQueue[
i]->getDepth() < MinDepth)
560 MinDepth = PendingQueue[
i]->getDepth();
564 AvailableQueue.dump(
this));
566 SUnit *FoundSUnit =
nullptr, *NotPreferredSUnit =
nullptr;
567 bool HasNoopHazards =
false;
568 while (!AvailableQueue.empty()) {
569 SUnit *CurSUnit = AvailableQueue.pop();
572 HazardRec->getHazardType(CurSUnit, 0);
574 if (HazardRec->ShouldPreferAnother(CurSUnit)) {
575 if (!NotPreferredSUnit) {
580 NotPreferredSUnit = CurSUnit;
584 FoundSUnit = CurSUnit;
592 NotReady.push_back(CurSUnit);
598 if (NotPreferredSUnit) {
601 dbgs() <<
"*** Will schedule a non-preferred instruction...\n");
602 FoundSUnit = NotPreferredSUnit;
604 AvailableQueue.push(NotPreferredSUnit);
607 NotPreferredSUnit =
nullptr;
611 if (!NotReady.empty()) {
612 AvailableQueue.push_all(NotReady);
619 unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
620 for (
unsigned i = 0;
i != NumPreNoops; ++
i)
624 ScheduleNodeTopDown(FoundSUnit, CurCycle);
625 HazardRec->EmitInstruction(FoundSUnit);
626 CycleHasInsts =
true;
627 if (HazardRec->atIssueLimit()) {
628 LLVM_DEBUG(
dbgs() <<
"*** Max instructions per cycle " << CurCycle
630 HazardRec->AdvanceCycle();
632 CycleHasInsts =
false;
637 HazardRec->AdvanceCycle();
638 }
else if (!HasNoopHazards) {
642 HazardRec->AdvanceCycle();
652 CycleHasInsts =
false;
657 unsigned ScheduledNodes = VerifyScheduledDAG(
false);
660 "The number of nodes scheduled doesn't match the expected number!");
665 void SchedulePostRATDList::EmitSchedule() {
666 RegionBegin = RegionEnd;
670 BB->splice(RegionEnd,
BB, FirstDbgValue);
683 RegionBegin = std::prev(RegionEnd);
687 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
688 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
689 std::pair<MachineInstr *, MachineInstr *>
P = *std::prev(DI);
695 FirstDbgValue =
nullptr;
unsigned getPosition() const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode
bool isAvailable
True once available.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
virtual const TargetInstrInfo * getInstrInfo() const
virtual ~AntiDepBreaker()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
Insert a noop into the instruction stream at the specified point.
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
SmallVector< SDep, 4 > Succs
All sunit successors.
Properties which a MachineFunction may have at a given point in time.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AntiDepBreaker * createCriticalAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI)
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
< i1 > br i1 label label bb bb
TargetInstrInfo - Interface to description of machine instruction set.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setDepthToAtLeast(unsigned NewDepth)
If NewDepth is greater than this node's depth value, sets it to be the new depth value.
Represent the analysis usage information of a pass.
const HexagonInstrInfo * TII
MachineFunctionProperties & set(Property P)
static cl::opt< bool > EnablePostRAScheduler("post-RA-scheduler", cl::desc("Enable scheduling after register allocation"), cl::init(false), cl::Hidden)
STATISTIC(NumFunctions, "Total number of functions")
static cl::opt< int > DebugMod("postra-sched-debugmod", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static cl::opt< std::string > EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden)
Target-Independent Code Generator Pass Configuration Options.
static cl::opt< int > DebugDiv("postra-sched-debugdiv", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
SmallVectorImpl< SDep >::iterator succ_iterator
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Class recording the (high level) value of a variable.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Representation of each machine instruction.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
bool isScheduled
True once scheduled.
initializer< Ty > init(const Ty &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Machine Instruction Scheduler
void setPreservesCFG()
This function should be called by the pass, iff they do not:
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
TargetSubtargetInfo - Generic base class for all target subtargets.
unsigned const MachineRegisterInfo * MRI
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
Function & getFunction()
Return the LLVM function that this machine code represents.
bool isWeak() const
Tests if this a weak dependence.
This class works in conjunction with the post-RA scheduler to rename registers to break register anti...
INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE, "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
virtual void finishBlock()
Cleans up after scheduling in the given block.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Scheduling unit. This is a node in the scheduling DAG.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Level
Code generation optimization level.
AnalysisUsage & addRequired()
A ScheduleDAG for scheduling lists of MachineInstr.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
void clearDAG()
Clears the DAG state (between regions).
Itinerary data supplied by a subtarget to be used by a target.
CodeGenOpt::Level getOptLevel() const
AntiDepBreaker * createAggressiveAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector &CriticalPathRCs)