LLVM 17.0.0git
AMDGPUSubtarget.h
Go to the documentation of this file.
1//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// Base class for AMDGPU specific classes of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16
17#include "llvm/IR/CallingConv.h"
20
21namespace llvm {
22
24class Function;
25class Instruction;
26class MachineFunction;
27class TargetMachine;
28
30public:
33 R600 = 1,
34 R700 = 2,
40 GFX9 = 8,
41 GFX10 = 9,
42 GFX11 = 10
43 };
44
45private:
46 Triple TargetTriple;
47
48protected:
49 bool GCN3Encoding = false;
50 bool Has16BitInsts = false;
51 bool HasTrue16BitInsts = false;
52 bool HasMadMixInsts = false;
53 bool HasMadMacF32Insts = false;
54 bool HasDsSrc2Insts = false;
55 bool HasSDWA = false;
56 bool HasVOP3PInsts = false;
57 bool HasMulI24 = true;
58 bool HasMulU24 = true;
59 bool HasSMulHi = false;
60 bool HasInv2PiInlineImm = false;
61 bool HasFminFmaxLegacy = true;
62 bool EnablePromoteAlloca = false;
63 bool HasTrigReducedRange = false;
64 unsigned EUsPerCU = 4;
65 unsigned MaxWavesPerEU = 10;
66 unsigned LocalMemorySize = 0;
69
70public:
71 AMDGPUSubtarget(const Triple &TT);
72
73 static const AMDGPUSubtarget &get(const MachineFunction &MF);
74 static const AMDGPUSubtarget &get(const TargetMachine &TM,
75 const Function &F);
76
77 /// \returns Default range flat work group size for a calling convention.
78 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
79
80 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
81 /// for function \p F, or minimum/maximum flat work group sizes explicitly
82 /// requested using "amdgpu-flat-work-group-size" attribute attached to
83 /// function \p F.
84 ///
85 /// \returns Subtarget's default values if explicitly requested values cannot
86 /// be converted to integer, or violate subtarget's specifications.
87 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
88
89 /// \returns Subtarget's default pair of minimum/maximum number of waves per
90 /// execution unit for function \p F, or minimum/maximum number of waves per
91 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
92 /// attached to function \p F.
93 ///
94 /// \returns Subtarget's default values if explicitly requested values cannot
95 /// be converted to integer, violate subtarget's specifications, or are not
96 /// compatible with minimum/maximum number of waves limited by flat work group
97 /// size, register usage, and/or lds usage.
98 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
99 // Default/requested minimum/maximum flat work group sizes.
100 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
101 return getWavesPerEU(F, FlatWorkGroupSizes);
102 }
103
104 /// Overload which uses the specified values for the flat work group sizes,
105 /// rather than querying the function itself. \p FlatWorkGroupSizes Should
106 /// correspond to the function's value for getFlatWorkGroupSizes.
107 std::pair<unsigned, unsigned>
108 getWavesPerEU(const Function &F,
109 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
110
111 /// Return the amount of LDS that can be used that will not restrict the
112 /// occupancy lower than WaveCount.
113 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114 const Function &) const;
115
116 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117 /// the given LDS memory size is the only constraint.
118 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119
120 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121
122 bool isAmdHsaOS() const {
123 return TargetTriple.getOS() == Triple::AMDHSA;
124 }
125
126 bool isAmdPalOS() const {
127 return TargetTriple.getOS() == Triple::AMDPAL;
128 }
129
130 bool isMesa3DOS() const {
131 return TargetTriple.getOS() == Triple::Mesa3D;
132 }
133
134 bool isMesaKernel(const Function &F) const;
135
136 bool isAmdHsaOrMesa(const Function &F) const {
137 return isAmdHsaOS() || isMesaKernel(F);
138 }
139
140 bool isGCN() const {
141 return TargetTriple.getArch() == Triple::amdgcn;
142 }
143
144 bool isGCN3Encoding() const {
145 return GCN3Encoding;
146 }
147
148 bool has16BitInsts() const {
149 return Has16BitInsts;
150 }
151
152 bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
153
154 bool hasMadMixInsts() const {
155 return HasMadMixInsts;
156 }
157
158 bool hasMadMacF32Insts() const {
159 return HasMadMacF32Insts || !isGCN();
160 }
161
162 bool hasDsSrc2Insts() const {
163 return HasDsSrc2Insts;
164 }
165
166 bool hasSDWA() const {
167 return HasSDWA;
168 }
169
170 bool hasVOP3PInsts() const {
171 return HasVOP3PInsts;
172 }
173
174 bool hasMulI24() const {
175 return HasMulI24;
176 }
177
178 bool hasMulU24() const {
179 return HasMulU24;
180 }
181
182 bool hasSMulHi() const {
183 return HasSMulHi;
184 }
185
186 bool hasInv2PiInlineImm() const {
187 return HasInv2PiInlineImm;
188 }
189
190 bool hasFminFmaxLegacy() const {
191 return HasFminFmaxLegacy;
192 }
193
194 bool hasTrigReducedRange() const {
195 return HasTrigReducedRange;
196 }
197
199 return EnablePromoteAlloca;
200 }
201
202 unsigned getWavefrontSize() const {
203 return 1 << WavefrontSizeLog2;
204 }
205
206 unsigned getWavefrontSizeLog2() const {
207 return WavefrontSizeLog2;
208 }
209
210 unsigned getLocalMemorySize() const {
211 return LocalMemorySize;
212 }
213
216 }
217
218 /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
219 /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
220 /// CU mode into account.
221 unsigned getEUsPerCU() const { return EUsPerCU; }
222
224 return isAmdHsaOS() ? Align(8) : Align(4);
225 }
226
227 /// Returns the offset in bytes from the start of the input buffer
228 /// of the first explicit kernel argument.
229 unsigned getExplicitKernelArgOffset(const Function &F) const {
230 switch (TargetTriple.getOS()) {
231 case Triple::AMDHSA:
232 case Triple::AMDPAL:
233 case Triple::Mesa3D:
234 return 0;
236 default:
237 // For legacy reasons unknown/other is treated as a different version of
238 // mesa.
239 return 36;
240 }
241
242 llvm_unreachable("invalid triple OS");
243 }
244
245 /// \returns Maximum number of work groups per compute unit supported by the
246 /// subtarget and limited by given \p FlatWorkGroupSize.
247 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
248
249 /// \returns Minimum flat work group size supported by the subtarget.
250 virtual unsigned getMinFlatWorkGroupSize() const = 0;
251
252 /// \returns Maximum flat work group size supported by the subtarget.
253 virtual unsigned getMaxFlatWorkGroupSize() const = 0;
254
255 /// \returns Number of waves per execution unit required to support the given
256 /// \p FlatWorkGroupSize.
257 virtual unsigned
258 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
259
260 /// \returns Minimum number of waves per execution unit supported by the
261 /// subtarget.
262 virtual unsigned getMinWavesPerEU() const = 0;
263
264 /// \returns Maximum number of waves per execution unit supported by the
265 /// subtarget without any kind of limitation.
266 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
267
268 /// Return the maximum workitem ID value in the function, for the given (0, 1,
269 /// 2) dimension.
270 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
271
272 /// Creates value range metadata on an workitemid.* intrinsic call or load.
274
275 /// \returns Number of bytes of arguments that are passed to a shader or
276 /// kernel in addition to the explicit ones declared for the function.
277 unsigned getImplicitArgNumBytes(const Function &F) const;
278 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
279 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
280
281 /// \returns Corresponding DWARF register number mapping flavour for the
282 /// \p WavefrontSize.
284
285 virtual ~AMDGPUSubtarget() = default;
286};
287
288} // end namespace llvm
289
290#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasFminFmaxLegacy() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Align getAlignmentForImplicitArgPtr() const
bool hasMadMacF32Insts() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
virtual unsigned getMinWavesPerEU() const =0
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
unsigned getAddressableLocalMemorySize() const
bool isGCN3Encoding() const
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
bool hasMadMixInsts() const
unsigned getWavefrontSizeLog2() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
bool has16BitInsts() const
virtual ~AMDGPUSubtarget()=default
bool hasTrue16BitInsts() const
bool isAmdHsaOrMesa(const Function &F) const
bool isPromoteAllocaEnabled() const
bool hasTrigReducedRange() const
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
bool hasDsSrc2Insts() const
unsigned getMaxWavesPerEU() const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
unsigned AddressableLocalMemorySize
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasInv2PiInlineImm() const
bool hasVOP3PInsts() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:365
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:356
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39