19#define DEBUG_TYPE "machine-scheduler"
30 void schedule()
override {}
59 "coexec scheduler only supports top-down scheduling");
75 "coexec scheduler only supports top-down scheduling");
77 if (
DAG->top() ==
DAG->bottom()) {
79 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
83 bool PickedPending =
false;
86 PickedPending =
false;
92 PickedPending,
false);
101 unsigned CurrentCycle =
Top.getCurrCycle();
102 if (ReadyCycle > CurrentCycle)
103 Top.bumpCycle(ReadyCycle);
106 while (
Top.checkHazard(SU))
107 Top.bumpCycle(
Top.getCurrCycle() + 1);
109 Top.releasePending();
120 assert(IsTopNode &&
"coexec scheduler must only schedule from top boundary");
127 bool &PickedPending,
bool IsBottomUp) {
128 assert(Zone.
isTop() &&
"coexec scheduler only supports top boundary");
129 assert(!IsBottomUp &&
"coexec scheduler only supports top-down scheduling");
133 unsigned SGPRPressure = 0;
134 unsigned VGPRPressure = 0;
135 PickedPending =
false;
136 if (
DAG->isTrackingPressure()) {
138 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
139 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
146 auto EvaluateQueue = [&](
ReadyQueue &Q,
bool FromPending) {
147 for (
SUnit *SU : Q) {
150 VGPRPressure, IsBottomUp);
157 PickedPending = FromPending;
169 EvaluateQueue(Zone.
Pending,
true);
187 if (
DAG->isTrackingPressure() &&
197 bool SameBoundary = Zone !=
nullptr;
220 bool CandIsClusterSucc =
222 bool TryCandIsClusterSucc =
225 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
237 if (
DAG->isTrackingPressure() &&
256 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
277 unsigned Structural = 0;
279 unsigned Effective = 0;
283 auto GetStallCosts = [&](
SUnit *SU) {
284 unsigned ReadyCycle = Zone.
isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
286 Costs.Ready = ReadyCycle > CurrCycle ? ReadyCycle - CurrCycle : 0;
289 Costs.Effective = std::max({Costs.Ready, Costs.Structural, Costs.Latency});
293 StallCosts TryCosts = GetStallCosts(TryCand.
SU);
294 StallCosts CandCosts = GetStallCosts(Cand.
SU);
296 LLVM_DEBUG(
if (TryCosts.Effective || CandCosts.Effective) {
297 dbgs() <<
"Effective stalls: try=" << TryCosts.Effective
298 <<
" (ready=" << TryCosts.Ready <<
", struct=" << TryCosts.Structural
299 <<
", lat=" << TryCosts.Latency <<
") cand=" << CandCosts.Effective
300 <<
" (ready=" << CandCosts.Ready
301 <<
", struct=" << CandCosts.Structural
302 <<
", lat=" << CandCosts.Latency <<
")\n";
305 return tryLess(TryCosts.Effective, CandCosts.Effective, TryCand, Cand,
Stall);
310 LLVM_DEBUG(
dbgs() <<
"AMDGPU coexec preRA scheduler selected for "
311 <<
C->MF->getName() <<
'\n');
313 C, std::make_unique<AMDGPUCoExecSchedStrategy>(
C));
319 <<
C->MF->getName() <<
'\n');
320 return new GCNNoopPostScheduleDAG(
C);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
Coexecution-focused scheduling strategy for AMDGPU.
bool tryEffectiveStall(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone) const
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &PickedPending, bool IsBottomUp)
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
AMDGPUCoExecSchedStrategy(const MachineSchedContext *C)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const TargetRegisterInfo * TRI
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineInstrBundleIterator< MachineInstr > iterator
virtual void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs)
Optionally override the per-region scheduling policy.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Scheduling unit. This is a node in the scheduling DAG.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
bool isBottomReady() const
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
A ScheduleDAG for scheduling lists of MachineInstr.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ScheduleDAGInstrs * createGCNNoopPostMachineScheduler(MachineSchedContext *C)
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
ScheduleDAGInstrs * createGCNCoExecMachineScheduler(MachineSchedContext *C)
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI cl::opt< MISched::Direction > PreRADirection
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CurrentMax