20#define DEBUG_TYPE "machine-scheduler"
28 VLIW5 = !ST.hasCaymanISA();
30 CurInstKind = IDOther;
32 OccupiedSlotsMask = 31;
33 InstKindLimit[IDAlu] =
TII->getMaxAlusPerClause();
34 InstKindLimit[IDOther] = 32;
35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
41 std::vector<SUnit *> &QDst)
48 assert (GPRCount &&
"GPRCount cannot be 0");
49 return 248 / GPRCount;
54 NextInstKind = IDOther;
59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
60 (Available[CurInstKind].empty());
61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
62 (!Available[IDFetch].empty() || !Available[IDOther].empty());
64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
69 float ALUFetchRationEstimate =
70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
71 (FetchInstCount + Available[IDFetch].
size());
72 if (ALUFetchRationEstimate == 0) {
73 AllowSwitchFromAlu =
true;
75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
76 LLVM_DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n");
87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
89 AllowSwitchFromAlu =
true;
93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
97 if (!SU && !PhysicalRegCopy.empty()) {
98 SU = PhysicalRegCopy.front();
99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
102 if (CurEmitted >= InstKindLimit[IDAlu])
104 NextInstKind = IDAlu;
110 SU = pickOther(IDFetch);
112 NextInstKind = IDFetch;
117 SU = pickOther(IDOther);
119 NextInstKind = IDOther;
123 dbgs() <<
" ** Pick node **\n";
126 dbgs() <<
"NO NODE \n";
136 if (NextInstKind != CurInstKind) {
138 if (NextInstKind != IDAlu)
139 OccupiedSlotsMask |= 31;
141 CurInstKind = NextInstKind;
144 if (CurInstKind == IDAlu) {
146 switch (getAluKind(SU)) {
157 if (MO.
isReg() && MO.
getReg() == R600::ALU_LITERAL_X)
166 LLVM_DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
168 if (CurInstKind != IDFetch) {
169 MoveUnits(Pending[IDFetch], Available[IDFetch]);
176 if (
MI->getOpcode() != R600::COPY)
179 return !
MI->getOperand(1).getReg().isVirtual();
189 PhysicalRegCopy.push_back(SU);
193 int IK = getInstKind(SU);
197 Available[IDOther].push_back(SU);
199 Pending[IK].push_back(SU);
203bool R600SchedStrategy::regBelongsToClass(
Register Reg,
205 if (!Reg.isVirtual())
210R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
216 switch (
MI->getOpcode()) {
219 case R600::INTERP_PAIR_XY:
220 case R600::INTERP_PAIR_ZW:
221 case R600::INTERP_VEC_LOAD:
225 if (
MI->getOperand(1).isUndef()) {
240 MI->getOpcode() == R600::GROUP_BARRIER) {
249 unsigned DestSubReg =
MI->getOperand(0).getSubReg();
250 switch (DestSubReg) {
264 Register DestReg =
MI->getOperand(0).getReg();
265 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
266 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
268 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
274 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
284int R600SchedStrategy::getInstKind(
SUnit* SU) {
297 case R600::CONST_COPY:
298 case R600::INTERP_PAIR_XY:
299 case R600::INTERP_PAIR_ZW:
300 case R600::INTERP_VEC_LOAD:
308SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
311 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
314 InstructionsGroupCandidate.push_back(SU->
getInstr());
317 InstructionsGroupCandidate.pop_back();
318 Q.erase((It + 1).base());
321 InstructionsGroupCandidate.pop_back();
326void R600SchedStrategy::LoadAlu() {
327 std::vector<SUnit *> &QSrc = Pending[IDAlu];
328 for (
SUnit *SU : QSrc) {
329 AluKind AK = getAluKind(SU);
330 AvailableAlus[AK].push_back(SU);
335void R600SchedStrategy::PrepareNextSlot() {
337 assert(OccupiedSlotsMask &&
"Slot wasn't filled");
338 OccupiedSlotsMask = 0;
341 InstructionsGroupCandidate.clear();
345void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
347 if (DstIndex == -1) {
350 Register DestReg =
MI->getOperand(DstIndex).getReg();
354 if (MO.getReg() == DestReg)
373SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
374 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
375 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
378 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
380 AssignSlot(UnslotedSU->
getInstr(), Slot);
384unsigned R600SchedStrategy::AvailablesAluCount()
const {
385 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
386 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
387 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
388 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
389 AvailableAlus[AluPredX].size();
392SUnit* R600SchedStrategy::pickAlu() {
393 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
394 if (!OccupiedSlotsMask) {
396 if (!AvailableAlus[AluPredX].empty()) {
397 OccupiedSlotsMask |= 31;
398 return PopInst(AvailableAlus[AluPredX],
false);
401 if (!AvailableAlus[AluDiscarded].empty()) {
402 OccupiedSlotsMask |= 31;
403 return PopInst(AvailableAlus[AluDiscarded],
false);
406 if (!AvailableAlus[AluT_XYZW].empty()) {
407 OccupiedSlotsMask |= 15;
408 return PopInst(AvailableAlus[AluT_XYZW],
false);
411 bool TransSlotOccupied = OccupiedSlotsMask & 16;
412 if (!TransSlotOccupied && VLIW5) {
413 if (!AvailableAlus[AluTrans].empty()) {
414 OccupiedSlotsMask |= 16;
415 return PopInst(AvailableAlus[AluTrans],
false);
417 SUnit *SU = AttemptFillSlot(3,
true);
419 OccupiedSlotsMask |= 16;
423 for (
int Chan = 3; Chan > -1; --Chan) {
424 bool isOccupied = OccupiedSlotsMask & (1 << Chan);
426 SUnit *SU = AttemptFillSlot(Chan,
false);
428 OccupiedSlotsMask |= (1 << Chan);
429 InstructionsGroupCandidate.push_back(SU->
getInstr());
439SUnit* R600SchedStrategy::pickOther(
int QID) {
441 std::vector<SUnit *> &AQ = Available[QID];
444 MoveUnits(Pending[QID], AQ);
const HexagonInstrInfo * TII
Provides R600 specific target descriptions.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
static bool isPhysicalRegCopy(MachineInstr *MI)
R600 Machine Scheduler interface.
AMDGPU R600 specific subclass of TargetSubtarget.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_iterator operands_end()
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool usesVertexCache(unsigned Opcode) const
bool usesTextureCache(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+.
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
bool isTransOnly(unsigned Opcode) const
bool isReductionOp(unsigned opcode) const
bool isCubeOp(unsigned opcode) const
bool isLDSInstr(unsigned Opcode) const
bool readsLDSSrcReg(const MachineInstr &MI) const
bool isALUInstr(unsigned Opcode) const
bool isVectorOnly(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Wrapper class representing virtual and physical registers.
Scheduling unit. This is a node in the scheduling DAG.
bool isScheduled
True once scheduled.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void dumpNode(const SUnit &SU) const override
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.