25 #define DEBUG_TYPE "machine-scheduler" 35 CurInstKind = IDOther;
37 OccupedSlotsMask = 31;
38 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
39 InstKindLimit[IDOther] = 32;
45 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
46 std::vector<SUnit *> &QDst)
48 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
53 assert (GPRCount &&
"GPRCount cannot be 0");
54 return 248 / GPRCount;
59 NextInstKind = IDOther;
64 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
65 (Available[CurInstKind].empty());
66 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
67 (!Available[IDFetch].empty() || !Available[IDOther].empty());
69 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
74 float ALUFetchRationEstimate =
75 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
76 (FetchInstCount + Available[IDFetch].size());
77 if (ALUFetchRationEstimate == 0) {
78 AllowSwitchFromAlu =
true;
80 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
81 DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n" );
92 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
94 AllowSwitchFromAlu =
true;
98 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
99 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
102 if (!SU && !PhysicalRegCopy.empty()) {
103 SU = PhysicalRegCopy.front();
104 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
107 if (CurEmitted >= InstKindLimit[IDAlu])
109 NextInstKind = IDAlu;
115 SU = pickOther(IDFetch);
117 NextInstKind = IDFetch;
122 SU = pickOther(IDOther);
124 NextInstKind = IDOther;
129 dbgs() <<
" ** Pick node **\n";
132 dbgs() <<
"NO NODE \n";
133 for (
unsigned i = 0; i < DAG->
SUnits.size(); i++) {
145 if (NextInstKind != CurInstKind) {
146 DEBUG(
dbgs() <<
"Instruction Type Switch\n");
147 if (NextInstKind != IDAlu)
148 OccupedSlotsMask |= 31;
150 CurInstKind = NextInstKind;
153 if (CurInstKind == IDAlu) {
155 switch (getAluKind(SU)) {
166 if (MO.
isReg() && MO.
getReg() == AMDGPU::ALU_LITERAL_X)
176 DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
178 if (CurInstKind != IDFetch) {
179 MoveUnits(Pending[IDFetch], Available[IDFetch]);
199 PhysicalRegCopy.push_back(SU);
203 int IK = getInstKind(SU);
207 Available[IDOther].push_back(SU);
209 Pending[IK].push_back(SU);
213 bool R600SchedStrategy::regBelongsToClass(
unsigned Reg,
222 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
231 case AMDGPU::INTERP_PAIR_XY:
232 case AMDGPU::INTERP_PAIR_ZW:
233 case AMDGPU::INTERP_VEC_LOAD:
251 MI->
getOpcode() == AMDGPU::GROUP_BARRIER) {
261 switch (DestSubReg) {
276 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
277 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
279 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
281 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
283 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
285 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
295 int R600SchedStrategy::getInstKind(
SUnit* SU) {
308 case AMDGPU::CONST_COPY:
309 case AMDGPU::INTERP_PAIR_XY:
310 case AMDGPU::INTERP_PAIR_ZW:
311 case AMDGPU::INTERP_VEC_LOAD:
319 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
322 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(),
E = Q.rend();
325 InstructionsGroupCandidate.push_back(SU->
getInstr());
328 InstructionsGroupCandidate.pop_back();
329 Q.erase((It + 1).base());
332 InstructionsGroupCandidate.pop_back();
338 void R600SchedStrategy::LoadAlu() {
339 std::vector<SUnit *> &QSrc = Pending[IDAlu];
340 for (
unsigned i = 0, e = QSrc.size(); i < e; ++i) {
341 AluKind AK = getAluKind(QSrc[i]);
342 AvailableAlus[AK].push_back(QSrc[i]);
347 void R600SchedStrategy::PrepareNextSlot() {
349 assert (OccupedSlotsMask &&
"Slot wasn't filled");
350 OccupedSlotsMask = 0;
353 InstructionsGroupCandidate.clear();
357 void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
359 if (DstIndex == -1) {
389 SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
390 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
391 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
394 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
396 AssignSlot(UnslotedSU->
getInstr(), Slot);
400 unsigned R600SchedStrategy::AvailablesAluCount()
const {
401 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
402 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
403 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
404 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
405 AvailableAlus[AluPredX].size();
408 SUnit* R600SchedStrategy::pickAlu() {
409 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
410 if (!OccupedSlotsMask) {
412 if (!AvailableAlus[AluPredX].empty()) {
413 OccupedSlotsMask |= 31;
414 return PopInst(AvailableAlus[AluPredX],
false);
417 if (!AvailableAlus[AluDiscarded].empty()) {
418 OccupedSlotsMask |= 31;
419 return PopInst(AvailableAlus[AluDiscarded],
false);
422 if (!AvailableAlus[AluT_XYZW].empty()) {
423 OccupedSlotsMask |= 15;
424 return PopInst(AvailableAlus[AluT_XYZW],
false);
427 bool TransSlotOccuped = OccupedSlotsMask & 16;
428 if (!TransSlotOccuped && VLIW5) {
429 if (!AvailableAlus[AluTrans].empty()) {
430 OccupedSlotsMask |= 16;
431 return PopInst(AvailableAlus[AluTrans],
false);
433 SUnit *SU = AttemptFillSlot(3,
true);
435 OccupedSlotsMask |= 16;
439 for (
int Chan = 3; Chan > -1; --Chan) {
440 bool isOccupied = OccupedSlotsMask & (1 << Chan);
442 SUnit *SU = AttemptFillSlot(Chan,
false);
444 OccupedSlotsMask |= (1 << Chan);
445 InstructionsGroupCandidate.push_back(SU->
getInstr());
455 SUnit* R600SchedStrategy::pickOther(
int QID) {
457 std::vector<SUnit *> &AQ = Available[QID];
460 MoveUnits(Pending[QID], AQ);
464 AQ.resize(AQ.size() - 1);
bool hasCaymanISA() const
mop_iterator operands_end()
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Interface definition for R600InstrInfo.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void dump(const ScheduleDAG *G) const
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
R600 Machine Scheduler interface.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool isScheduled
True once scheduled.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool usesVertexCache(unsigned Opcode) const
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isPhysicalRegCopy(MachineInstr *MI)
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
bool isLDSInstr(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
bool isVectorOnly(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
bool isTransOnly(unsigned Opcode) const
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
MachineOperand class - Representation of each machine instruction operand.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isALUInstr(unsigned Opcode) const
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
bool readsLDSSrcReg(const MachineInstr &MI) const
Representation of each machine instruction.
short getTexVTXClauseSize() const
bool isReductionOp(unsigned opcode) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
std::vector< SUnit > SUnits
The scheduling units.
const MachineOperand & getOperand(unsigned i) const
bool usesTextureCache(unsigned Opcode) const
Scheduling unit. This is a node in the scheduling DAG.
bool isCubeOp(unsigned opcode) const