LLVM  9.0.0svn
R600MachineScheduler.cpp
Go to the documentation of this file.
1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// R600 Machine Scheduler interface
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600MachineScheduler.h"
15 #include "AMDGPUSubtarget.h"
16 #include "R600InstrInfo.h"
20 #include "llvm/Pass.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "machine-scheduler"
26 
28  assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
29  DAG = static_cast<ScheduleDAGMILive*>(dag);
30  const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>();
31  TII = static_cast<const R600InstrInfo*>(DAG->TII);
32  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33  VLIW5 = !ST.hasCaymanISA();
34  MRI = &DAG->MRI;
35  CurInstKind = IDOther;
36  CurEmitted = 0;
37  OccupedSlotsMask = 31;
38  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
39  InstKindLimit[IDOther] = 32;
40  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
41  AluInstCount = 0;
42  FetchInstCount = 0;
43 }
44 
45 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
46  std::vector<SUnit *> &QDst)
47 {
48  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
49  QSrc.clear();
50 }
51 
52 static unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
53  assert (GPRCount && "GPRCount cannot be 0");
54  return 248 / GPRCount;
55 }
56 
58  SUnit *SU = nullptr;
59  NextInstKind = IDOther;
60 
61  IsTopNode = false;
62 
63  // check if we might want to switch current clause type
64  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
65  (Available[CurInstKind].empty());
66  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
67  (!Available[IDFetch].empty() || !Available[IDOther].empty());
68 
69  if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
70  // We use the heuristic provided by AMD Accelerated Parallel Processing
71  // OpenCL Programming Guide :
72  // The approx. number of WF that allows TEX inst to hide ALU inst is :
73  // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
74  float ALUFetchRationEstimate =
75  (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
76  (FetchInstCount + Available[IDFetch].size());
77  if (ALUFetchRationEstimate == 0) {
78  AllowSwitchFromAlu = true;
79  } else {
80  unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
81  LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");
82  // We assume the local GPR requirements to be "dominated" by the requirement
83  // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
84  // after TEX are indeed likely to consume or generate values from/for the
85  // TEX clause.
86  // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
87  // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
88  // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
89  // (TODO : use RegisterPressure)
90  // If we are going too use too many GPR, we flush Fetch instruction to lower
91  // register pressure on 128 bits regs.
92  unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
93  if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
94  AllowSwitchFromAlu = true;
95  }
96  }
97 
98  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
99  (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
100  // try to pick ALU
101  SU = pickAlu();
102  if (!SU && !PhysicalRegCopy.empty()) {
103  SU = PhysicalRegCopy.front();
104  PhysicalRegCopy.erase(PhysicalRegCopy.begin());
105  }
106  if (SU) {
107  if (CurEmitted >= InstKindLimit[IDAlu])
108  CurEmitted = 0;
109  NextInstKind = IDAlu;
110  }
111  }
112 
113  if (!SU) {
114  // try to pick FETCH
115  SU = pickOther(IDFetch);
116  if (SU)
117  NextInstKind = IDFetch;
118  }
119 
120  // try to pick other
121  if (!SU) {
122  SU = pickOther(IDOther);
123  if (SU)
124  NextInstKind = IDOther;
125  }
126 
127  LLVM_DEBUG(if (SU) {
128  dbgs() << " ** Pick node **\n";
129  DAG->dumpNode(*SU);
130  } else {
131  dbgs() << "NO NODE \n";
132  for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
133  const SUnit &S = DAG->SUnits[i];
134  if (!S.isScheduled)
135  DAG->dumpNode(S);
136  }
137  });
138 
139  return SU;
140 }
141 
142 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
143  if (NextInstKind != CurInstKind) {
144  LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
145  if (NextInstKind != IDAlu)
146  OccupedSlotsMask |= 31;
147  CurEmitted = 0;
148  CurInstKind = NextInstKind;
149  }
150 
151  if (CurInstKind == IDAlu) {
152  AluInstCount ++;
153  switch (getAluKind(SU)) {
154  case AluT_XYZW:
155  CurEmitted += 4;
156  break;
157  case AluDiscarded:
158  break;
159  default: {
160  ++CurEmitted;
162  E = SU->getInstr()->operands_end(); It != E; ++It) {
163  MachineOperand &MO = *It;
164  if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
165  ++CurEmitted;
166  }
167  }
168  }
169  } else {
170  ++CurEmitted;
171  }
172 
173  LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
174 
175  if (CurInstKind != IDFetch) {
176  MoveUnits(Pending[IDFetch], Available[IDFetch]);
177  } else
178  FetchInstCount++;
179 }
180 
181 static bool
183  if (MI->getOpcode() != R600::COPY)
184  return false;
185 
187 }
188 
190  LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
191 }
192 
194  LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
195  if (isPhysicalRegCopy(SU->getInstr())) {
196  PhysicalRegCopy.push_back(SU);
197  return;
198  }
199 
200  int IK = getInstKind(SU);
201 
202  // There is no export clause, we can schedule one as soon as its ready
203  if (IK == IDOther)
204  Available[IDOther].push_back(SU);
205  else
206  Pending[IK].push_back(SU);
207 
208 }
209 
210 bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
211  const TargetRegisterClass *RC) const {
213  return RC->contains(Reg);
214  } else {
215  return MRI->getRegClass(Reg) == RC;
216  }
217 }
218 
219 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
220  MachineInstr *MI = SU->getInstr();
221 
222  if (TII->isTransOnly(*MI))
223  return AluTrans;
224 
225  switch (MI->getOpcode()) {
226  case R600::PRED_X:
227  return AluPredX;
228  case R600::INTERP_PAIR_XY:
229  case R600::INTERP_PAIR_ZW:
230  case R600::INTERP_VEC_LOAD:
231  case R600::DOT_4:
232  return AluT_XYZW;
233  case R600::COPY:
234  if (MI->getOperand(1).isUndef()) {
235  // MI will become a KILL, don't considers it in scheduling
236  return AluDiscarded;
237  }
238  break;
239  default:
240  break;
241  }
242 
243  // Does the instruction take a whole IG ?
244  // XXX: Is it possible to add a helper function in R600InstrInfo that can
245  // be used here and in R600PacketizerList::isSoloInstruction() ?
246  if(TII->isVector(*MI) ||
247  TII->isCubeOp(MI->getOpcode()) ||
248  TII->isReductionOp(MI->getOpcode()) ||
249  MI->getOpcode() == R600::GROUP_BARRIER) {
250  return AluT_XYZW;
251  }
252 
253  if (TII->isLDSInstr(MI->getOpcode())) {
254  return AluT_X;
255  }
256 
257  // Is the result already assigned to a channel ?
258  unsigned DestSubReg = MI->getOperand(0).getSubReg();
259  switch (DestSubReg) {
260  case R600::sub0:
261  return AluT_X;
262  case R600::sub1:
263  return AluT_Y;
264  case R600::sub2:
265  return AluT_Z;
266  case R600::sub3:
267  return AluT_W;
268  default:
269  break;
270  }
271 
272  // Is the result already member of a X/Y/Z/W class ?
273  unsigned DestReg = MI->getOperand(0).getReg();
274  if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
275  regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
276  return AluT_X;
277  if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
278  return AluT_Y;
279  if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
280  return AluT_Z;
281  if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
282  return AluT_W;
283  if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
284  return AluT_XYZW;
285 
286  // LDS src registers cannot be used in the Trans slot.
287  if (TII->readsLDSSrcReg(*MI))
288  return AluT_XYZW;
289 
290  return AluAny;
291 }
292 
293 int R600SchedStrategy::getInstKind(SUnit* SU) {
294  int Opcode = SU->getInstr()->getOpcode();
295 
296  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
297  return IDFetch;
298 
299  if (TII->isALUInstr(Opcode)) {
300  return IDAlu;
301  }
302 
303  switch (Opcode) {
304  case R600::PRED_X:
305  case R600::COPY:
306  case R600::CONST_COPY:
307  case R600::INTERP_PAIR_XY:
308  case R600::INTERP_PAIR_ZW:
309  case R600::INTERP_VEC_LOAD:
310  case R600::DOT_4:
311  return IDAlu;
312  default:
313  return IDOther;
314  }
315 }
316 
317 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
318  if (Q.empty())
319  return nullptr;
320  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
321  It != E; ++It) {
322  SUnit *SU = *It;
323  InstructionsGroupCandidate.push_back(SU->getInstr());
324  if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&
325  (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) {
326  InstructionsGroupCandidate.pop_back();
327  Q.erase((It + 1).base());
328  return SU;
329  } else {
330  InstructionsGroupCandidate.pop_back();
331  }
332  }
333  return nullptr;
334 }
335 
336 void R600SchedStrategy::LoadAlu() {
337  std::vector<SUnit *> &QSrc = Pending[IDAlu];
338  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
339  AluKind AK = getAluKind(QSrc[i]);
340  AvailableAlus[AK].push_back(QSrc[i]);
341  }
342  QSrc.clear();
343 }
344 
345 void R600SchedStrategy::PrepareNextSlot() {
346  LLVM_DEBUG(dbgs() << "New Slot\n");
347  assert (OccupedSlotsMask && "Slot wasn't filled");
348  OccupedSlotsMask = 0;
349 // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
350 // OccupedSlotsMask |= 16;
351  InstructionsGroupCandidate.clear();
352  LoadAlu();
353 }
354 
355 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
356  int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
357  if (DstIndex == -1) {
358  return;
359  }
360  unsigned DestReg = MI->getOperand(DstIndex).getReg();
361  // PressureRegister crashes if an operand is def and used in the same inst
362  // and we try to constraint its regclass
364  E = MI->operands_end(); It != E; ++It) {
365  MachineOperand &MO = *It;
366  if (MO.isReg() && !MO.isDef() &&
367  MO.getReg() == DestReg)
368  return;
369  }
370  // Constrains the regclass of DestReg to assign it to Slot
371  switch (Slot) {
372  case 0:
373  MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
374  break;
375  case 1:
376  MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
377  break;
378  case 2:
379  MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
380  break;
381  case 3:
382  MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
383  break;
384  }
385 }
386 
387 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
388  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
389  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
390  if (SlotedSU)
391  return SlotedSU;
392  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
393  if (UnslotedSU)
394  AssignSlot(UnslotedSU->getInstr(), Slot);
395  return UnslotedSU;
396 }
397 
398 unsigned R600SchedStrategy::AvailablesAluCount() const {
399  return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
400  AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
401  AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
402  AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
403  AvailableAlus[AluPredX].size();
404 }
405 
406 SUnit* R600SchedStrategy::pickAlu() {
407  while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
408  if (!OccupedSlotsMask) {
409  // Bottom up scheduling : predX must comes first
410  if (!AvailableAlus[AluPredX].empty()) {
411  OccupedSlotsMask |= 31;
412  return PopInst(AvailableAlus[AluPredX], false);
413  }
414  // Flush physical reg copies (RA will discard them)
415  if (!AvailableAlus[AluDiscarded].empty()) {
416  OccupedSlotsMask |= 31;
417  return PopInst(AvailableAlus[AluDiscarded], false);
418  }
419  // If there is a T_XYZW alu available, use it
420  if (!AvailableAlus[AluT_XYZW].empty()) {
421  OccupedSlotsMask |= 15;
422  return PopInst(AvailableAlus[AluT_XYZW], false);
423  }
424  }
425  bool TransSlotOccuped = OccupedSlotsMask & 16;
426  if (!TransSlotOccuped && VLIW5) {
427  if (!AvailableAlus[AluTrans].empty()) {
428  OccupedSlotsMask |= 16;
429  return PopInst(AvailableAlus[AluTrans], false);
430  }
431  SUnit *SU = AttemptFillSlot(3, true);
432  if (SU) {
433  OccupedSlotsMask |= 16;
434  return SU;
435  }
436  }
437  for (int Chan = 3; Chan > -1; --Chan) {
438  bool isOccupied = OccupedSlotsMask & (1 << Chan);
439  if (!isOccupied) {
440  SUnit *SU = AttemptFillSlot(Chan, false);
441  if (SU) {
442  OccupedSlotsMask |= (1 << Chan);
443  InstructionsGroupCandidate.push_back(SU->getInstr());
444  return SU;
445  }
446  }
447  }
448  PrepareNextSlot();
449  }
450  return nullptr;
451 }
452 
453 SUnit* R600SchedStrategy::pickOther(int QID) {
454  SUnit *SU = nullptr;
455  std::vector<SUnit *> &AQ = Available[QID];
456 
457  if (AQ.empty()) {
458  MoveUnits(Pending[QID], AQ);
459  }
460  if (!AQ.empty()) {
461  SU = AQ.back();
462  AQ.pop_back();
463  }
464  return SU;
465 }
mop_iterator operands_end()
Definition: MachineInstr.h:455
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Interface definition for R600InstrInfo.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
R600 Machine Scheduler interface.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool usesVertexCache(unsigned Opcode) const
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isPhysicalRegCopy(MachineInstr *MI)
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
bool isLDSInstr(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
bool isVectorOnly(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
bool isTransOnly(unsigned Opcode) const
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1173
MachineOperand class - Representation of each machine instruction operand.
bool hasCaymanISA() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isALUInstr(unsigned Opcode) const
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
void dumpNode(const SUnit &SU) const override
bool readsLDSSrcReg(const MachineInstr &MI) const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
short getTexVTXClauseSize() const
bool isReductionOp(unsigned opcode) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
Definition: MachineInstr.h:454
IRTranslator LLVM IR MI
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
bool usesTextureCache(unsigned Opcode) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCubeOp(unsigned opcode) const