LLVM 20.0.0git
AMDGPUExportClustering.cpp
Go to the documentation of this file.
1//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a DAG scheduling mutation to cluster shader
10/// exports.
11//
12//===----------------------------------------------------------------------===//
13
16#include "SIInstrInfo.h"
18
19using namespace llvm;
20
21namespace {
22
23class ExportClustering : public ScheduleDAGMutation {
24public:
25 ExportClustering() = default;
26 void apply(ScheduleDAGInstrs *DAG) override;
27};
28
29static bool isExport(const SUnit &SU) {
30 return SIInstrInfo::isEXP(*SU.getInstr());
31}
32
33static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
34 const MachineInstr *MI = SU->getInstr();
35 unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
37}
38
39static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
40 unsigned PosCount) {
41 if (!PosCount || PosCount == Chain.size())
42 return;
43
44 // Position exports should occur as soon as possible in the shader
45 // for optimal performance. This moves position exports before
46 // other exports while preserving the order within different export
47 // types (pos or other).
49 unsigned PosIdx = 0;
50 unsigned OtherIdx = PosCount;
51 for (SUnit *SU : Copy) {
52 if (isPositionExport(TII, SU))
53 Chain[PosIdx++] = SU;
54 else
55 Chain[OtherIdx++] = SU;
56 }
57}
58
59static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
60 SUnit *ChainHead = Exports.front();
61
62 // Now construct cluster from chain by adding new edges.
63 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
64 SUnit *SUa = Exports[Idx];
65 SUnit *SUb = Exports[Idx + 1];
66
67 // Copy all dependencies to the head of the chain to avoid any
68 // computation being inserted into the chain.
69 for (const SDep &Pred : SUb->Preds) {
70 SUnit *PredSU = Pred.getSUnit();
71 if (!isExport(*PredSU) && !Pred.isWeak())
72 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
73 }
74
75 // New barrier edge ordering exports
76 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
77 // Also add cluster edge
78 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
79 }
80}
81
82static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
84
85 for (const SDep &Pred : SU.Preds) {
86 SUnit *PredSU = Pred.getSUnit();
87 if (Pred.isBarrier() && isExport(*PredSU)) {
88 ToRemove.push_back(Pred);
89 if (isExport(SU))
90 continue;
91
92 // If we remove a barrier we need to copy dependencies
93 // from the predecessor to maintain order.
94 for (const SDep &ExportPred : PredSU->Preds) {
95 SUnit *ExportPredSU = ExportPred.getSUnit();
96 if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
97 ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
98 }
99 }
100 }
101
102 for (SDep Pred : ToRemove)
103 SU.removePred(Pred);
104 for (SDep Pred : ToAdd)
105 DAG->addEdge(&SU, Pred);
106}
107
108void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
109 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
110
112
113 // Pass through DAG gathering a list of exports and removing barrier edges
114 // creating dependencies on exports. Freeing exports of successor edges
115 // allows more scheduling freedom, and nothing should be order dependent
116 // on exports. Edges will be added later to order the exports.
117 unsigned PosCount = 0;
118 for (SUnit &SU : DAG->SUnits) {
119 if (!isExport(SU))
120 continue;
121
122 Chain.push_back(&SU);
123 if (isPositionExport(TII, &SU))
124 PosCount++;
125
126 removeExportDependencies(DAG, SU);
127
128 SmallVector<SDep, 4> Succs(SU.Succs);
129 for (SDep Succ : Succs)
130 removeExportDependencies(DAG, *Succ.getSUnit());
131 }
132
133 // Apply clustering if there are multiple exports
134 if (Chain.size() > 1) {
135 sortChain(TII, Chain, PosCount);
136 buildCluster(Chain, DAG);
137 }
138}
139
140} // end namespace
141
142namespace llvm {
143
144std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
145 return std::make_unique<ExportClustering>();
146}
147
148} // end namespace llvm
Provides AMDGPU specific target descriptions.
ReachingDefAnalysis InstSet & ToRemove
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Interface definition for SIInstrInfo.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
Representation of each machine instruction.
Definition: MachineInstr.h:69
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
Definition: ScheduleDAG.h:174
static bool isEXP(const MachineInstr &MI)
Definition: SIInstrInfo.h:655
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Mutate the DAG as a postpass after normal DAG building.
virtual void apply(ScheduleDAGInstrs *DAG)=0
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()