LLVM 20.0.0git
AMDGPUSetWavePriority.cpp
Go to the documentation of this file.
1//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to temporarily raise the wave priority beginning the start of
11/// the shader function until its last VMEM instructions to allow younger
12/// waves to issue their VMEM instructions as well.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "amdgpu-set-wave-priority"
26
28 "amdgpu-set-wave-priority-valu-insts-threshold",
29 cl::desc("VALU instruction count threshold for adjusting wave priority"),
30 cl::init(100), cl::Hidden);
31
32namespace {
33
34struct MBBInfo {
35 MBBInfo() = default;
36 unsigned NumVALUInstsAtStart = 0;
37 bool MayReachVMEMLoad = false;
38 MachineInstr *LastVMEMLoad = nullptr;
39};
40
42
43class AMDGPUSetWavePriority : public MachineFunctionPass {
44public:
45 static char ID;
46
47 AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
48
49 StringRef getPassName() const override { return "Set wave priority"; }
50
51 bool runOnMachineFunction(MachineFunction &MF) override;
52
53private:
54 MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
56 unsigned priority) const;
57
58 const SIInstrInfo *TII;
59};
60
61} // End anonymous namespace.
62
63INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
64 false)
65
66char AMDGPUSetWavePriority::ID = 0;
67
69 return new AMDGPUSetWavePriority();
70}
71
73AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
75 unsigned priority) const {
76 return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
77 .addImm(priority);
78}
79
80// Checks that for every predecessor Pred that can reach a VMEM load,
81// none of Pred's successors can reach a VMEM load.
83 MBBInfoSet &MBBInfos) {
84 for (const MachineBasicBlock *Pred : MBB.predecessors()) {
85 if (!MBBInfos[Pred].MayReachVMEMLoad)
86 continue;
87 for (const MachineBasicBlock *Succ : Pred->successors()) {
88 if (MBBInfos[Succ].MayReachVMEMLoad)
89 return false;
90 }
91 }
92 return true;
93}
94
95static bool isVMEMLoad(const MachineInstr &MI) {
96 return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
97}
98
99bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
100 const unsigned HighPriority = 3;
101 const unsigned LowPriority = 0;
102
103 Function &F = MF.getFunction();
104 if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
105 return false;
106
108 TII = ST.getInstrInfo();
109
110 unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
111 Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
112 if (A.isValid())
113 A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
114
115 // Find VMEM loads that may be executed before long-enough sequences of
116 // VALU instructions. We currently assume that backedges/loops, branch
117 // probabilities and other details can be ignored, so we essentially
118 // determine the largest number of VALU instructions along every
119 // possible path from the start of the function that may potentially be
120 // executed provided no backedge is ever taken.
121 MBBInfoSet MBBInfos;
122 for (MachineBasicBlock *MBB : post_order(&MF)) {
123 bool AtStart = true;
124 unsigned MaxNumVALUInstsInMiddle = 0;
125 unsigned NumVALUInstsAtEnd = 0;
126 for (MachineInstr &MI : *MBB) {
127 if (isVMEMLoad(MI)) {
128 AtStart = false;
129 MBBInfo &Info = MBBInfos[MBB];
130 Info.NumVALUInstsAtStart = 0;
131 MaxNumVALUInstsInMiddle = 0;
132 NumVALUInstsAtEnd = 0;
133 Info.LastVMEMLoad = &MI;
134 } else if (SIInstrInfo::isDS(MI)) {
135 AtStart = false;
136 MaxNumVALUInstsInMiddle =
137 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
138 NumVALUInstsAtEnd = 0;
139 } else if (SIInstrInfo::isVALU(MI)) {
140 if (AtStart)
141 ++MBBInfos[MBB].NumVALUInstsAtStart;
142 ++NumVALUInstsAtEnd;
143 }
144 }
145
146 bool SuccsMayReachVMEMLoad = false;
147 unsigned NumFollowingVALUInsts = 0;
148 for (const MachineBasicBlock *Succ : MBB->successors()) {
149 SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
150 NumFollowingVALUInsts =
151 std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
152 }
153 MBBInfo &Info = MBBInfos[MBB];
154 if (AtStart)
155 Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
156 NumVALUInstsAtEnd += NumFollowingVALUInsts;
157
158 unsigned MaxNumVALUInsts =
159 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
160 Info.MayReachVMEMLoad =
161 SuccsMayReachVMEMLoad ||
162 (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
163 }
164
166 if (!MBBInfos[&Entry].MayReachVMEMLoad)
167 return false;
168
169 // Raise the priority at the beginning of the shader.
170 MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
171 while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
172 ++I;
173 BuildSetprioMI(Entry, I, HighPriority);
174
175 // Lower the priority on edges where control leaves blocks from which
176 // the VMEM loads are reachable.
177 SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
178 for (MachineBasicBlock &MBB : MF) {
179 if (MBBInfos[&MBB].MayReachVMEMLoad) {
180 if (MBB.succ_empty())
181 PriorityLoweringBlocks.insert(&MBB);
182 continue;
183 }
184
186 for (MachineBasicBlock *Pred : MBB.predecessors()) {
187 if (MBBInfos[Pred].MayReachVMEMLoad)
188 PriorityLoweringBlocks.insert(Pred);
189 }
190 continue;
191 }
192
193 // Where lowering the priority in predecessors is not possible, the
194 // block receiving control either was not part of a loop in the first
195 // place or the loop simplification/canonicalization pass should have
196 // already tried to split the edge and insert a preheader, and if for
197 // whatever reason it failed to do so, then this leaves us with the
198 // only option of lowering the priority within the loop.
199 PriorityLoweringBlocks.insert(&MBB);
200 }
201
202 for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
203 BuildSetprioMI(
204 *MBB,
205 MBBInfos[MBB].LastVMEMLoad
206 ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
207 : MBB->begin(),
208 LowPriority);
209 }
210
211 return true;
212}
Provides AMDGPU specific target descriptions.
static cl::opt< unsigned > DefaultVALUInstsThreshold("amdgpu-set-wave-priority-valu-insts-threshold", cl::desc("VALU instruction count threshold for adjusting wave priority"), cl::init(100), cl::Hidden)
static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, MBBInfoSet &MBBInfos)
static bool isVMEMLoad(const MachineInstr &MI)
#define DEBUG_TYPE
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Interface definition for SIInstrInfo.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:560
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:438
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:422
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool isEntryFunctionCC(CallingConv::ID CC)
@ Entry
Definition: COFF.h:844
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUSetWavePriorityPass()
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< po_iterator< T > > post_order(const T &G)