20#define DEBUG_TYPE "machine-scheduler"
28 VLIW5 = !ST.hasCaymanISA();
30 CurInstKind = IDOther;
32 OccupiedSlotsMask = 31;
33 InstKindLimit[IDAlu] =
TII->getMaxAlusPerClause();
34 InstKindLimit[IDOther] = 32;
35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
41 std::vector<SUnit *> &QDst)
48 assert (GPRCount &&
"GPRCount cannot be 0");
49 return 248 / GPRCount;
54 NextInstKind = IDOther;
59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
60 (Available[CurInstKind].empty());
61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
62 (!Available[IDFetch].empty() || !Available[IDOther].empty());
64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
69 float ALUFetchRationEstimate =
70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
71 (FetchInstCount + Available[IDFetch].
size());
72 if (ALUFetchRationEstimate == 0) {
73 AllowSwitchFromAlu =
true;
75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
76 LLVM_DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n");
87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
89 AllowSwitchFromAlu =
true;
93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
97 if (!SU && !PhysicalRegCopy.empty()) {
98 SU = PhysicalRegCopy.front();
99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
102 if (CurEmitted >= InstKindLimit[IDAlu])
104 NextInstKind = IDAlu;
110 SU = pickOther(IDFetch);
112 NextInstKind = IDFetch;
117 SU = pickOther(IDOther);
119 NextInstKind = IDOther;
123 dbgs() <<
" ** Pick node **\n";
126 dbgs() <<
"NO NODE \n";
136 if (NextInstKind != CurInstKind) {
138 if (NextInstKind != IDAlu)
139 OccupiedSlotsMask |= 31;
141 CurInstKind = NextInstKind;
144 if (CurInstKind == IDAlu) {
146 switch (getAluKind(SU)) {
157 if (MO.
isReg() && MO.
getReg() == R600::ALU_LITERAL_X)
166 LLVM_DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
168 if (CurInstKind != IDFetch) {
169 MoveUnits(Pending[IDFetch], Available[IDFetch]);
176 if (
MI->getOpcode() != R600::COPY)
179 return !
MI->getOperand(1).getReg().isVirtual();
189 PhysicalRegCopy.push_back(SU);
193 int IK = getInstKind(SU);
197 Available[IDOther].push_back(SU);
199 Pending[IK].push_back(SU);
203bool R600SchedStrategy::regBelongsToClass(
Register Reg,
205 if (!Reg.isVirtual())
210R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
216 switch (
MI->getOpcode()) {
219 case R600::INTERP_PAIR_XY:
220 case R600::INTERP_PAIR_ZW:
221 case R600::INTERP_VEC_LOAD:
225 if (
MI->getOperand(1).isUndef()) {
240 MI->getOpcode() == R600::GROUP_BARRIER) {
249 unsigned DestSubReg =
MI->getOperand(0).getSubReg();
250 switch (DestSubReg) {
264 Register DestReg =
MI->getOperand(0).getReg();
265 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
266 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
268 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
274 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
284int R600SchedStrategy::getInstKind(
SUnit* SU) {
297 case R600::CONST_COPY:
298 case R600::INTERP_PAIR_XY:
299 case R600::INTERP_PAIR_ZW:
300 case R600::INTERP_VEC_LOAD:
308SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
311 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
314 InstructionsGroupCandidate.push_back(SU->
getInstr());
317 InstructionsGroupCandidate.pop_back();
318 Q.erase((It + 1).base());
321 InstructionsGroupCandidate.pop_back();
326void R600SchedStrategy::LoadAlu() {
327 std::vector<SUnit *> &QSrc = Pending[IDAlu];
328 for (
SUnit *SU : QSrc) {
329 AluKind AK = getAluKind(SU);
330 AvailableAlus[AK].push_back(SU);
335void R600SchedStrategy::PrepareNextSlot() {
337 assert(OccupiedSlotsMask &&
"Slot wasn't filled");
338 OccupiedSlotsMask = 0;
341 InstructionsGroupCandidate.clear();
345void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
347 if (DstIndex == -1) {
350 Register DestReg =
MI->getOperand(DstIndex).getReg();
354 E =
MI->operands_end(); It != E; ++It) {
377SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
378 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
379 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
382 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
384 AssignSlot(UnslotedSU->
getInstr(), Slot);
388unsigned R600SchedStrategy::AvailablesAluCount()
const {
389 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
390 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
391 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
392 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
393 AvailableAlus[AluPredX].size();
396SUnit* R600SchedStrategy::pickAlu() {
397 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
398 if (!OccupiedSlotsMask) {
400 if (!AvailableAlus[AluPredX].empty()) {
401 OccupiedSlotsMask |= 31;
402 return PopInst(AvailableAlus[AluPredX],
false);
405 if (!AvailableAlus[AluDiscarded].empty()) {
406 OccupiedSlotsMask |= 31;
407 return PopInst(AvailableAlus[AluDiscarded],
false);
410 if (!AvailableAlus[AluT_XYZW].empty()) {
411 OccupiedSlotsMask |= 15;
412 return PopInst(AvailableAlus[AluT_XYZW],
false);
415 bool TransSlotOccupied = OccupiedSlotsMask & 16;
416 if (!TransSlotOccupied && VLIW5) {
417 if (!AvailableAlus[AluTrans].empty()) {
418 OccupiedSlotsMask |= 16;
419 return PopInst(AvailableAlus[AluTrans],
false);
421 SUnit *SU = AttemptFillSlot(3,
true);
423 OccupiedSlotsMask |= 16;
427 for (
int Chan = 3; Chan > -1; --Chan) {
428 bool isOccupied = OccupiedSlotsMask & (1 << Chan);
430 SUnit *SU = AttemptFillSlot(Chan,
false);
432 OccupiedSlotsMask |= (1 << Chan);
433 InstructionsGroupCandidate.push_back(SU->
getInstr());
443SUnit* R600SchedStrategy::pickOther(
int QID) {
445 std::vector<SUnit *> &AQ = Available[QID];
448 MoveUnits(Pending[QID], AQ);
const HexagonInstrInfo * TII
Provides R600 specific target descriptions.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
static bool isPhysicalRegCopy(MachineInstr *MI)
R600 Machine Scheduler interface.
AMDGPU R600 specific subclass of TargetSubtarget.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_iterator operands_end()
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool usesVertexCache(unsigned Opcode) const
bool usesTextureCache(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+.
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
bool isTransOnly(unsigned Opcode) const
bool isReductionOp(unsigned opcode) const
bool isCubeOp(unsigned opcode) const
bool isLDSInstr(unsigned Opcode) const
bool readsLDSSrcReg(const MachineInstr &MI) const
bool isALUInstr(unsigned Opcode) const
bool isVectorOnly(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Wrapper class representing virtual and physical registers.
Scheduling unit. This is a node in the scheduling DAG.
bool isScheduled
True once scheduled.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void dumpNode(const SUnit &SU) const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.