35#define DEBUG_TYPE "machine-scheduler"
41 cl::desc(
"The OOO window for processor "
42 "resources during scheduling."),
45unsigned SystemZHazardRecognizer::
46getNumDecoderSlots(
SUnit *SU)
const {
51 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52 "Only cracked instruction can have 2 uops.");
53 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54 "Expanded instructions always group alone.");
55 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56 "Expanded instructions fill the group(s).");
58 return SC->NumMicroOps;
61unsigned SystemZHazardRecognizer::getCurrCycleIdx(
SUnit *SU)
const {
62 unsigned Idx = CurrGroupSize;
66 if (SU !=
nullptr && !fitsIntoCurrentGroup(SU)) {
69 else if (
Idx == 4 ||
Idx == 5)
83 CurrGroupHas4RegOps =
false;
84 clearProcResCounters();
86 LastFPdOpCycleIdx = UINT_MAX;
87 LastEmittedMI =
nullptr;
92SystemZHazardRecognizer::fitsIntoCurrentGroup(
SUnit *SU)
const {
100 return (CurrGroupSize == 0);
103 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104 "Current decoder group is already full!");
105 if (CurrGroupSize == 2 && has4RegOps(SU->
getInstr()))
111 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112 "Expected normal instruction to fit in non-full group!");
117bool SystemZHazardRecognizer::has4RegOps(
const MachineInstr *
MI)
const {
134void SystemZHazardRecognizer::nextGroup() {
135 if (CurrGroupSize == 0)
141 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
142 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
143 "Current decoder group bad.");
147 CurrGroupHas4RegOps =
false;
153 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
154 ? (ProcResourceCounters[i] - NumGroups)
158 if (CriticalResourceIdx != UINT_MAX &&
159 (ProcResourceCounters[CriticalResourceIdx] <=
161 CriticalResourceIdx = UINT_MAX;
180 std::string FU(PRD.
Name);
182 FU = FU.substr(FU.find(
'_') + 1);
183 size_t Pos = FU.find(
"Unit");
184 if (Pos != std::string::npos)
190 if (PI->ReleaseAtCycle> 1)
191 OS <<
"(" << PI->ReleaseAtCycle <<
"cyc)";
194 if (SC->NumMicroOps > 1)
195 OS <<
"/" << SC->NumMicroOps <<
"uops";
196 if (SC->BeginGroup && SC->EndGroup)
197 OS <<
"/GroupsAlone";
198 else if (SC->BeginGroup)
199 OS <<
"/BeginsGroup";
200 else if (SC->EndGroup)
209 dbgs() <<
"++ " << Msg;
213 dbgs() <<
" <empty>\n";
216 dbgs() <<
" (" << CurrGroupSize <<
" decoder slot"
217 << (CurrGroupSize > 1 ?
"s":
"")
218 << (CurrGroupHas4RegOps ?
", 4RegOps" :
"")
227 if (ProcResourceCounters[i] > 0) {
235 dbgs() <<
"++ | Resource counters: ";
237 if (ProcResourceCounters[i] > 0)
239 <<
":" << ProcResourceCounters[i] <<
" ";
242 if (CriticalResourceIdx != UINT_MAX)
243 dbgs() <<
"++ | Critical resource: "
250 dbgs() <<
"++ | Current cycle index: "
251 << getCurrCycleIdx() <<
"\n";
253 if (LastFPdOpCycleIdx != UINT_MAX)
254 dbgs() <<
"++ | Last FPd cycle index: " << LastFPdOpCycleIdx <<
"\n";
259void SystemZHazardRecognizer::clearProcResCounters() {
261 CriticalResourceIdx = UINT_MAX;
265 return (
MI->isBranch() ||
MI->isReturn() ||
266 MI->getOpcode() == SystemZ::CondTrap);
279 if (!fitsIntoCurrentGroup(SU))
303 ProcResourceCounters[PI->ProcResourceIdx];
304 CurrCounter += PI->ReleaseAtCycle;
307 (CriticalResourceIdx == UINT_MAX ||
308 (PI->ProcResourceIdx != CriticalResourceIdx &&
310 ProcResourceCounters[CriticalResourceIdx]))) {
312 dbgs() <<
"++ New critical resource: "
315 CriticalResourceIdx = PI->ProcResourceIdx;
321 LastFPdOpCycleIdx = getCurrCycleIdx(SU);
322 LLVM_DEBUG(
dbgs() <<
"++ Last FPd cycle index: " << LastFPdOpCycleIdx
328 CurrGroupSize += getNumDecoderSlots(SU);
329 CurrGroupHas4RegOps |= has4RegOps(SU->
getInstr());
330 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
331 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
332 &&
"SU does not fit into decoder group!");
336 if (CurrGroupSize >= GroupLim || SC->EndGroup)
347 if (SC->BeginGroup) {
349 return 3 - CurrGroupSize;
356 unsigned resultingGroupSize =
357 (CurrGroupSize + getNumDecoderSlots(SU));
358 if (resultingGroupSize < 3)
359 return (3 - resultingGroupSize);
364 if (CurrGroupSize == 2 && has4RegOps(SU->
getInstr()))
371bool SystemZHazardRecognizer::isFPdOpPreferred_distance(
SUnit *SU)
const {
374 if (LastFPdOpCycleIdx == UINT_MAX)
380 unsigned SUCycleIdx = getCurrCycleIdx(SU);
381 if (LastFPdOpCycleIdx > SUCycleIdx)
382 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
383 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
397 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
399 else if (CriticalResourceIdx != UINT_MAX) {
403 if (PI->ProcResourceIdx == CriticalResourceIdx)
404 Cost = PI->ReleaseAtCycle;
434 unsigned GroupSizeBeforeEmit = CurrGroupSize;
439 if (GroupSizeBeforeEmit == 1)
443 if (TakenBranch && CurrGroupSize > 0)
447 "Scheduler: unhandled terminator!");
453 CurrGroupSize =
Incoming->CurrGroupSize;
457 ProcResourceCounters =
Incoming->ProcResourceCounters;
458 CriticalResourceIdx =
Incoming->CriticalResourceIdx;
461 LastFPdOpCycleIdx =
Incoming->LastFPdOpCycleIdx;
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static bool isBranchRetTrap(MachineInstr *MI)
static cl::opt< int > ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8))
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
bool hasReservedResource
Uses a reserved resource.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void assign(size_type NumElts, ValueParamT Elt)
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpProcResourceCounters() const
void emitInstruction(MachineInstr *MI, bool TakenBranch=false)
Wrap a non-scheduled instruction in an SU and emit it.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void copyState(SystemZHazardRecognizer *Incoming)
Copy counters from end of single predecessor.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
void dumpSU(SUnit *SU, raw_ostream &OS) const
HazardType getHazardType(SUnit *SU, int Stalls=0) override
getHazardType - Return the hazard type of emitting this node.
void dumpCurrGroup(std::string Msg="") const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
const SystemZRegisterInfo & getRegisterInfo() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...