24 #define DEBUG_TYPE "misched"
27 assert(dag->
hasVRegLiveness() &&
"R600SchedStrategy needs vreg liveness");
34 CurInstKind = IDOther;
36 OccupedSlotsMask = 31;
37 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
38 InstKindLimit[IDOther] = 32;
44 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
45 std::vector<SUnit *> &QDst)
47 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
53 assert (GPRCount &&
"GPRCount cannot be 0");
54 return 248 / GPRCount;
59 NextInstKind = IDOther;
64 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
65 (Available[CurInstKind].empty());
66 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
67 (!Available[IDFetch].empty() || !Available[IDOther].empty());
69 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
74 float ALUFetchRationEstimate =
75 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
76 (FetchInstCount + Available[IDFetch].
size());
77 if (ALUFetchRationEstimate == 0) {
78 AllowSwitchFromAlu =
true;
80 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
81 DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n" );
92 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
94 AllowSwitchFromAlu =
true;
98 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
99 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
102 if (!SU && !PhysicalRegCopy.empty()) {
103 SU = PhysicalRegCopy.front();
104 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
107 if (CurEmitted >= InstKindLimit[IDAlu])
109 NextInstKind = IDAlu;
115 SU = pickOther(IDFetch);
117 NextInstKind = IDFetch;
122 SU = pickOther(IDOther);
124 NextInstKind = IDOther;
129 dbgs() <<
" ** Pick node **\n";
132 dbgs() <<
"NO NODE \n";
133 for (
unsigned i = 0; i < DAG->
SUnits.size(); i++) {
145 if (NextInstKind != CurInstKind) {
146 DEBUG(
dbgs() <<
"Instruction Type Switch\n");
147 if (NextInstKind != IDAlu)
148 OccupedSlotsMask |= 31;
150 CurInstKind = NextInstKind;
153 if (CurInstKind == IDAlu) {
155 switch (getAluKind(SU)) {
166 if (MO.
isReg() && MO.
getReg() == AMDGPU::ALU_LITERAL_X)
176 DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
178 if (CurInstKind != IDFetch) {
179 MoveUnits(Pending[IDFetch], Available[IDFetch]);
199 PhysicalRegCopy.push_back(SU);
203 int IK = getInstKind(SU);
207 Available[IDOther].push_back(SU);
209 Pending[IK].push_back(SU);
213 bool R600SchedStrategy::regBelongsToClass(
unsigned Reg,
222 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
231 case AMDGPU::INTERP_PAIR_XY:
232 case AMDGPU::INTERP_PAIR_ZW:
233 case AMDGPU::INTERP_VEC_LOAD:
251 MI->
getOpcode() == AMDGPU::GROUP_BARRIER) {
261 switch (DestSubReg) {
276 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
277 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
279 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
281 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
283 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
285 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
296 int R600SchedStrategy::getInstKind(
SUnit* SU) {
309 case AMDGPU::CONST_COPY:
310 case AMDGPU::INTERP_PAIR_XY:
311 case AMDGPU::INTERP_PAIR_ZW:
312 case AMDGPU::INTERP_VEC_LOAD:
320 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
323 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
326 InstructionsGroupCandidate.push_back(SU->
getInstr());
330 InstructionsGroupCandidate.pop_back();
331 Q.erase((It + 1).base());
334 InstructionsGroupCandidate.pop_back();
340 void R600SchedStrategy::LoadAlu() {
341 std::vector<SUnit *> &QSrc = Pending[IDAlu];
342 for (
unsigned i = 0, e = QSrc.size(); i < e; ++i) {
343 AluKind AK = getAluKind(QSrc[i]);
344 AvailableAlus[AK].push_back(QSrc[i]);
349 void R600SchedStrategy::PrepareNextSlot() {
351 assert (OccupedSlotsMask &&
"Slot wasn't filled");
352 OccupedSlotsMask = 0;
355 InstructionsGroupCandidate.clear();
359 void R600SchedStrategy::AssignSlot(
MachineInstr* MI,
unsigned Slot) {
361 if (DstIndex == -1) {
391 SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
392 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
393 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
396 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
402 unsigned R600SchedStrategy::AvailablesAluCount()
const {
403 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
404 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
405 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
406 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
407 AvailableAlus[AluPredX].size();
410 SUnit* R600SchedStrategy::pickAlu() {
411 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
412 if (!OccupedSlotsMask) {
414 if (!AvailableAlus[AluPredX].empty()) {
415 OccupedSlotsMask |= 31;
416 return PopInst(AvailableAlus[AluPredX],
false);
419 if (!AvailableAlus[AluDiscarded].empty()) {
420 OccupedSlotsMask |= 31;
421 return PopInst(AvailableAlus[AluDiscarded],
false);
424 if (!AvailableAlus[AluT_XYZW].empty()) {
425 OccupedSlotsMask |= 15;
426 return PopInst(AvailableAlus[AluT_XYZW],
false);
429 bool TransSlotOccuped = OccupedSlotsMask & 16;
430 if (!TransSlotOccuped && VLIW5) {
431 if (!AvailableAlus[AluTrans].empty()) {
432 OccupedSlotsMask |= 16;
433 return PopInst(AvailableAlus[AluTrans],
false);
435 SUnit *SU = AttemptFillSlot(3,
true);
437 OccupedSlotsMask |= 16;
441 for (
int Chan = 3; Chan > -1; --Chan) {
442 bool isOccupied = OccupedSlotsMask & (1 << Chan);
444 SUnit *SU = AttemptFillSlot(Chan,
false);
446 OccupedSlotsMask |= (1 << Chan);
447 InstructionsGroupCandidate.push_back(SU->
getInstr());
457 SUnit* R600SchedStrategy::pickOther(
int QID) {
459 std::vector<SUnit *> &AQ = Available[QID];
462 MoveUnits(Pending[QID], AQ);
466 AQ.resize(AQ.size() - 1);
bool readsLDSSrcReg(const MachineInstr *MI) const
bool hasCaymanISA() const
mop_iterator operands_end()
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
bool isLDSInstr(unsigned Opcode) const
AMDGPU specific subclass of TargetSubtarget.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
R600 Machine Scheduler interface.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
bool isVectorOnly(unsigned Opcode) const
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
COPY - Target-independent register copy.
bool isCubeOp(unsigned opcode) const
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
bool usesVertexCache(unsigned Opcode) const
bool usesTextureCache(unsigned Opcode) const
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
short getTexVTXClauseSize() const
const MachineOperand & getOperand(unsigned i) const
static bool isPhysicalRegCopy(MachineInstr *MI)
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
unsigned getSubReg() const
MachineOperand class - Representation of each machine instruction operand.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Representation of each machine instruction.
bool isReductionOp(unsigned opcode) const
unsigned getReg() const
getReg - Returns the register number.
mop_iterator operands_begin()
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
std::vector< SUnit > SUnits
bool isALUInstr(unsigned Opcode) const
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit.
bool isTransOnly(unsigned Opcode) const
SUnit - Scheduling unit. This is a node in the scheduling DAG.
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.