LLVM  14.0.0git
AMDGPUExportClustering.cpp
Go to the documentation of this file.
1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a DAG scheduling mutation to cluster shader
10 /// exports.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUExportClustering.h"
16 #include "SIInstrInfo.h"
18 
19 using namespace llvm;
20 
21 namespace {
22 
23 class ExportClustering : public ScheduleDAGMutation {
24 public:
25  ExportClustering() {}
26  void apply(ScheduleDAGInstrs *DAG) override;
27 };
28 
29 static bool isExport(const SUnit &SU) {
30  return SIInstrInfo::isEXP(*SU.getInstr());
31 }
32 
33 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
34  const MachineInstr *MI = SU->getInstr();
35  unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
36  return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
37 }
38 
39 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
40  unsigned PosCount) {
41  if (!PosCount || PosCount == Chain.size())
42  return;
43 
44  // Position exports should occur as soon as possible in the shader
45  // for optimal performance. This moves position exports before
46  // other exports while preserving the order within different export
47  // types (pos or other).
49  unsigned PosIdx = 0;
50  unsigned OtherIdx = PosCount;
51  for (SUnit *SU : Copy) {
52  if (isPositionExport(TII, SU))
53  Chain[PosIdx++] = SU;
54  else
55  Chain[OtherIdx++] = SU;
56  }
57 }
58 
59 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
60  SUnit *ChainHead = Exports.front();
61 
62  // Now construct cluster from chain by adding new edges.
63  for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
64  SUnit *SUa = Exports[Idx];
65  SUnit *SUb = Exports[Idx + 1];
66 
67  // Copy all dependencies to the head of the chain to avoid any
68  // computation being inserted into the chain.
69  for (const SDep &Pred : SUb->Preds) {
70  SUnit *PredSU = Pred.getSUnit();
71  if (!isExport(*PredSU) && !Pred.isWeak())
72  DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
73  }
74 
75  // New barrier edge ordering exports
76  DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
77  // Also add cluster edge
78  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
79  }
80 }
81 
82 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
84 
85  for (const SDep &Pred : SU.Preds) {
86  SUnit *PredSU = Pred.getSUnit();
87  if (Pred.isBarrier() && isExport(*PredSU)) {
88  ToRemove.push_back(Pred);
89  if (isExport(SU))
90  continue;
91 
92  // If we remove a barrier we need to copy dependencies
93  // from the predecessor to maintain order.
94  for (const SDep &ExportPred : PredSU->Preds) {
95  SUnit *ExportPredSU = ExportPred.getSUnit();
96  if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
97  ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
98  }
99  }
100  }
101 
102  for (SDep Pred : ToRemove)
103  SU.removePred(Pred);
104  for (SDep Pred : ToAdd)
105  DAG->addEdge(&SU, Pred);
106 }
107 
109  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
110 
112 
113  // Pass through DAG gathering a list of exports and removing barrier edges
114  // creating dependencies on exports. Freeing exports of successor edges
115  // allows more scheduling freedom, and nothing should be order dependent
116  // on exports. Edges will be added later to order the exports.
117  unsigned PosCount = 0;
118  for (SUnit &SU : DAG->SUnits) {
119  if (!isExport(SU))
120  continue;
121 
122  Chain.push_back(&SU);
123  if (isPositionExport(TII, &SU))
124  PosCount++;
125 
126  removeExportDependencies(DAG, SU);
127 
128  SmallVector<SDep, 4> Succs(SU.Succs);
129  for (SDep Succ : Succs)
130  removeExportDependencies(DAG, *Succ.getSUnit());
131  }
132 
133  // Apply clustering if there are multiple exports
134  if (Chain.size() > 1) {
135  sortChain(TII, Chain, PosCount);
136  buildCluster(Chain, DAG);
137  }
138 }
139 
140 } // end namespace
141 
142 namespace llvm {
143 
144 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
145  return std::make_unique<ExportClustering>();
146 }
147 
148 } // end namespace llvm
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
ScheduleDAGInstrs.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:546
llvm::SIInstrInfo::isEXP
static bool isEXP(const MachineInstr &MI)
Definition: SIInstrInfo.h:552
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1200
llvm::AMDGPU::Exp::ET_POS0
@ ET_POS0
Definition: SIDefines.h:747
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1318
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
llvm::AMDGPU::Exp::ET_POS_LAST
@ ET_POS_LAST
Definition: SIDefines.h:750
AMDGPUMCTargetDesc.h
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
SIInstrInfo.h
AMDGPUExportClustering.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::SDep::isWeak
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::SDep::Cluster
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::SDep::isBarrier
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
Definition: ScheduleDAG.h:174