29 #define DEBUG_TYPE "machine-scheduler"
34 EnableIGroupLP(
"amdgpu-igrouplp",
35 cl::desc(
"Enable construction of Instruction Groups and "
36 "their ordering for scheduling"),
40 VMEMGroupMaxSize(
"amdgpu-igrouplp-vmem-group-size",
cl::init(
None),
42 cl::desc(
"The maximum number of instructions to include "
46 MFMAGroupMaxSize(
"amdgpu-igrouplp-mfma-group-size",
cl::init(
None),
48 cl::desc(
"The maximum number of instructions to include "
52 LDRGroupMaxSize(
"amdgpu-igrouplp-ldr-group-size",
cl::init(
None),
54 cl::desc(
"The maximum number of instructions to include "
55 "in lds/gds read group."));
58 LDWGroupMaxSize(
"amdgpu-igrouplp-ldw-group-size",
cl::init(
None),
60 cl::desc(
"The maximum number of instructions to include "
61 "in lds/gds write group."));
73 const CanAddMIFn canAddMI;
87 <<
"from: SU(" << A->NodeNum <<
") " << *A->getInstr()
88 <<
"to: SU(" <<
B->NodeNum <<
") " << *
B->getInstr());
96 void link(
SUnit &SU,
bool MakePred =
false) {
97 for (
auto A : Collection) {
110 for (
auto A : Collection) {
121 void link(SchedGroup &OtherGroup) {
122 for (
auto B : OtherGroup.Collection)
127 bool isFull() {
return MaxSize && Collection.size() >= *MaxSize; }
135 if (
MI.getOpcode() != TargetOpcode::BUNDLE)
136 return canAddMI(
MI,
TII);
141 while (
E !=
MBB->
end() &&
E->isBundledWithPred())
149 void add(
SUnit &SU) { Collection.push_back(&SU); }
153 : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {}
157 return TII->isMFMA(
MI);
165 return TII->isSALU(
MI);
173 return MI.mayLoad() &&
178 return MI.mayStore() &&
183 return MI.mayStore() &&
TII->isDS(
MI);
187 return MI.mayLoad() &&
TII->isDS(
MI);
195 IGroupLPDAGMutation() =
default;
211 enum class SchedBarrierMasks {
219 VMEM_WRITE = 1u << 6,
229 std::unique_ptr<SchedGroup> MFMASchedGroup =
nullptr;
230 std::unique_ptr<SchedGroup> VALUSchedGroup =
nullptr;
231 std::unique_ptr<SchedGroup> SALUSchedGroup =
nullptr;
232 std::unique_ptr<SchedGroup> VMEMReadSchedGroup =
nullptr;
233 std::unique_ptr<SchedGroup> VMEMWriteSchedGroup =
nullptr;
234 std::unique_ptr<SchedGroup> DSWriteSchedGroup =
nullptr;
235 std::unique_ptr<SchedGroup> DSReadSchedGroup =
nullptr;
239 void getSchedGroupsFromMask(int32_t
Mask,
243 void addSchedBarrierEdges(
SUnit &SU);
246 void initSchedGroup(SchedGroup *SG);
249 void resetSchedBarrierEdges(
SUnit &SU);
254 SchedBarrierDAGMutation() =
default;
259 TII =
ST.getInstrInfo();
262 if (!TSchedModel || DAG->SUnits.empty())
272 SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG),
273 SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG),
274 SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG),
275 SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)};
277 for (
SUnit &SU : DAG->SUnits) {
279 for (
auto &SG : PipelineOrderGroups)
280 if (SG.canAddSU(SU,
TII))
284 for (
unsigned i = 0;
i < PipelineOrderGroups.size() - 1;
i++) {
285 auto &GroupA = PipelineOrderGroups[
i];
286 for (
unsigned j =
i + 1;
j < PipelineOrderGroups.size();
j++) {
287 auto &GroupB = PipelineOrderGroups[
j];
295 if (!TSchedModel || DAGInstrs->
SUnits.empty())
301 TII =
ST.getInstrInfo();
303 for (
auto &SU : DAG->SUnits)
304 if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER)
305 addSchedBarrierEdges(SU);
308 void SchedBarrierDAGMutation::addSchedBarrierEdges(
SUnit &SchedBarrier) {
310 assert(
MI.getOpcode() == AMDGPU::SCHED_BARRIER);
313 resetSchedBarrierEdges(SchedBarrier);
315 int32_t
Mask =
MI.getOperand(0).getImm();
316 getSchedGroupsFromMask(
Mask, SchedGroups);
317 for (
auto SG : SchedGroups)
321 return A->NodeNum >
B->NodeNum;
325 void SchedBarrierDAGMutation::getSchedGroupsFromMask(
327 SchedBarrierMasks SBMask = (SchedBarrierMasks)
Mask;
333 if (!VALUSchedGroup) {
334 VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember,
None, DAG);
335 initSchedGroup(VALUSchedGroup.get());
338 SchedGroups.push_back(VALUSchedGroup.get());
343 if (!SALUSchedGroup) {
344 SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember,
None, DAG);
345 initSchedGroup(SALUSchedGroup.get());
348 SchedGroups.push_back(SALUSchedGroup.get());
353 if (!MFMASchedGroup) {
354 MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember,
None, DAG);
355 initSchedGroup(MFMASchedGroup.get());
358 SchedGroups.push_back(MFMASchedGroup.get());
363 if (!VMEMReadSchedGroup) {
365 std::make_unique<SchedGroup>(isVMEMReadSGMember,
None, DAG);
366 initSchedGroup(VMEMReadSchedGroup.get());
369 SchedGroups.push_back(VMEMReadSchedGroup.get());
374 if (!VMEMWriteSchedGroup) {
375 VMEMWriteSchedGroup =
376 std::make_unique<SchedGroup>(isVMEMWriteSGMember,
None, DAG);
377 initSchedGroup(VMEMWriteSchedGroup.get());
380 SchedGroups.push_back(VMEMWriteSchedGroup.get());
385 if (!DSReadSchedGroup) {
387 std::make_unique<SchedGroup>(isDSReadSGMember,
None, DAG);
388 initSchedGroup(DSReadSchedGroup.get());
391 SchedGroups.push_back(DSReadSchedGroup.get());
396 if (!DSWriteSchedGroup) {
398 std::make_unique<SchedGroup>(isDSWriteSGMember,
None, DAG);
399 initSchedGroup(DSWriteSchedGroup.get());
402 SchedGroups.push_back(DSWriteSchedGroup.get());
406 void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) {
408 for (
auto &SU : DAG->SUnits)
409 if (SG->canAddSU(SU,
TII))
413 void SchedBarrierDAGMutation::resetSchedBarrierEdges(
SUnit &SU) {
418 for (
auto &
S : SU.
Succs) {
419 for (
auto &SP :
S.getSUnit()->Preds) {
420 if (SP.getSUnit() == &SU) {
421 S.getSUnit()->removePred(SP);
432 return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() :
nullptr;
436 return std::make_unique<SchedBarrierDAGMutation>();