20#define DEBUG_TYPE "machine-scheduler"
28 VLIW5 = !ST.hasCaymanISA();
30 CurInstKind = IDOther;
32 OccupiedSlotsMask = 31;
33 InstKindLimit[IDAlu] =
TII->getMaxAlusPerClause();
34 InstKindLimit[IDOther] = 32;
35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
41 std::vector<SUnit *> &QDst)
48 assert (GPRCount &&
"GPRCount cannot be 0");
49 return 248 / GPRCount;
54 NextInstKind = IDOther;
59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
60 (Available[CurInstKind].empty());
61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
62 (!Available[IDFetch].empty() || !Available[IDOther].empty());
64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
69 float ALUFetchRationEstimate =
70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
71 (FetchInstCount + Available[IDFetch].
size());
72 if (ALUFetchRationEstimate == 0) {
73 AllowSwitchFromAlu =
true;
75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
76 LLVM_DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n");
87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
89 AllowSwitchFromAlu =
true;
93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
97 if (!SU && !PhysicalRegCopy.empty()) {
98 SU = PhysicalRegCopy.front();
99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
102 if (CurEmitted >= InstKindLimit[IDAlu])
104 NextInstKind = IDAlu;
110 SU = pickOther(IDFetch);
112 NextInstKind = IDFetch;
117 SU = pickOther(IDOther);
119 NextInstKind = IDOther;
123 dbgs() <<
" ** Pick node **\n";
126 dbgs() <<
"NO NODE \n";
136 if (NextInstKind != CurInstKind) {
138 if (NextInstKind != IDAlu)
139 OccupiedSlotsMask |= 31;
141 CurInstKind = NextInstKind;
144 if (CurInstKind == IDAlu) {
146 switch (getAluKind(SU)) {
157 if (MO.
isReg() && MO.
getReg() == R600::ALU_LITERAL_X)
166 LLVM_DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
168 if (CurInstKind != IDFetch) {
169 MoveUnits(Pending[IDFetch], Available[IDFetch]);
176 if (
MI->getOpcode() != R600::COPY)
179 return !
MI->getOperand(1).getReg().isVirtual();
189 PhysicalRegCopy.push_back(SU);
193 int IK = getInstKind(SU);
197 Available[IDOther].push_back(SU);
199 Pending[IK].push_back(SU);
203bool R600SchedStrategy::regBelongsToClass(
Register Reg,
205 if (!Reg.isVirtual()) {
212R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
218 switch (
MI->getOpcode()) {
221 case R600::INTERP_PAIR_XY:
222 case R600::INTERP_PAIR_ZW:
223 case R600::INTERP_VEC_LOAD:
227 if (
MI->getOperand(1).isUndef()) {
242 MI->getOpcode() == R600::GROUP_BARRIER) {
251 unsigned DestSubReg =
MI->getOperand(0).getSubReg();
252 switch (DestSubReg) {
266 Register DestReg =
MI->getOperand(0).getReg();
267 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
268 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
274 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
276 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
286int R600SchedStrategy::getInstKind(
SUnit* SU) {
299 case R600::CONST_COPY:
300 case R600::INTERP_PAIR_XY:
301 case R600::INTERP_PAIR_ZW:
302 case R600::INTERP_VEC_LOAD:
310SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
313 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(),
E = Q.rend();
316 InstructionsGroupCandidate.push_back(SU->
getInstr());
319 InstructionsGroupCandidate.pop_back();
320 Q.erase((It + 1).base());
323 InstructionsGroupCandidate.pop_back();
329void R600SchedStrategy::LoadAlu() {
330 std::vector<SUnit *> &QSrc = Pending[IDAlu];
331 for (
SUnit *SU : QSrc) {
332 AluKind AK = getAluKind(SU);
333 AvailableAlus[AK].push_back(SU);
338void R600SchedStrategy::PrepareNextSlot() {
340 assert(OccupiedSlotsMask &&
"Slot wasn't filled");
341 OccupiedSlotsMask = 0;
344 InstructionsGroupCandidate.clear();
348void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
350 if (DstIndex == -1) {
353 Register DestReg =
MI->getOperand(DstIndex).getReg();
357 E =
MI->operands_end(); It !=
E; ++It) {
380SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
381 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
382 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
385 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
387 AssignSlot(UnslotedSU->
getInstr(), Slot);
391unsigned R600SchedStrategy::AvailablesAluCount()
const {
392 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
393 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
394 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
395 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
396 AvailableAlus[AluPredX].size();
399SUnit* R600SchedStrategy::pickAlu() {
400 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
401 if (!OccupiedSlotsMask) {
403 if (!AvailableAlus[AluPredX].empty()) {
404 OccupiedSlotsMask |= 31;
405 return PopInst(AvailableAlus[AluPredX],
false);
408 if (!AvailableAlus[AluDiscarded].empty()) {
409 OccupiedSlotsMask |= 31;
410 return PopInst(AvailableAlus[AluDiscarded],
false);
413 if (!AvailableAlus[AluT_XYZW].empty()) {
414 OccupiedSlotsMask |= 15;
415 return PopInst(AvailableAlus[AluT_XYZW],
false);
418 bool TransSlotOccupied = OccupiedSlotsMask & 16;
419 if (!TransSlotOccupied && VLIW5) {
420 if (!AvailableAlus[AluTrans].empty()) {
421 OccupiedSlotsMask |= 16;
422 return PopInst(AvailableAlus[AluTrans],
false);
424 SUnit *SU = AttemptFillSlot(3,
true);
426 OccupiedSlotsMask |= 16;
430 for (
int Chan = 3; Chan > -1; --Chan) {
431 bool isOccupied = OccupiedSlotsMask & (1 << Chan);
433 SUnit *SU = AttemptFillSlot(Chan,
false);
435 OccupiedSlotsMask |= (1 << Chan);
436 InstructionsGroupCandidate.push_back(SU->
getInstr());
446SUnit* R600SchedStrategy::pickOther(
int QID) {
448 std::vector<SUnit *> &AQ = Available[QID];
451 MoveUnits(Pending[QID], AQ);
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
Provides R600 specific target descriptions.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
static bool isPhysicalRegCopy(MachineInstr *MI)
R600 Machine Scheduler interface.
AMDGPU R600 specific subclass of TargetSubtarget.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr uint32_t Opcode
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_iterator operands_end()
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool usesVertexCache(unsigned Opcode) const
bool usesTextureCache(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+.
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
bool isTransOnly(unsigned Opcode) const
bool isReductionOp(unsigned opcode) const
bool isCubeOp(unsigned opcode) const
bool isLDSInstr(unsigned Opcode) const
bool readsLDSSrcReg(const MachineInstr &MI) const
bool isALUInstr(unsigned Opcode) const
bool isVectorOnly(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Wrapper class representing virtual and physical registers.
Scheduling unit. This is a node in the scheduling DAG.
bool isScheduled
True once scheduled.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void dumpNode(const SUnit &SU) const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.