LLVM 23.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15#include "AMDGPUCallLowering.h"
17#include "AMDGPULegalizerInfo.h"
19#include "R600Subtarget.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/IR/IntrinsicsR600.h"
28#include "llvm/IR/MDBuilder.h"
29#include <algorithm>
30
31using namespace llvm;
32
33#define DEBUG_TYPE "amdgpu-subtarget"
34
35// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
36// allows the given function to achieve an occupancy of NWaves waves per
37// SIMD / EU, taking into account only the function's *maximum* workgroup size.
38unsigned
40 const Function &F) const {
41 const unsigned WaveSize = getWavefrontSize();
42 const unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
43 const unsigned WavesPerWorkgroup =
44 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
45
46 const unsigned WorkGroupsPerCU =
47 std::max(1u, (NWaves * getEUsPerCU()) / WavesPerWorkgroup);
48
49 return getLocalMemorySize() / WorkGroupsPerCU;
50}
51
53 uint32_t LDSBytes, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
54
55 // LDS granularity accounted for by aligning the queried LDS size to the
56 // allocation block size.
57 const unsigned Granularity = std::max(LDSAllocationGranularity, 1u);
58 LDSBytes = alignTo(LDSBytes, Granularity);
59 const unsigned MaxWGsLDS = getLocalMemorySize() / std::max(LDSBytes, 1u);
60
61 // Queried LDS size may be larger than available on a CU, in which case we
62 // consider the only achievable occupancy to be 1, in line with what we
63 // consider the occupancy to be when the number of requested registers in a
64 // particular bank is higher than the number of available ones in that bank.
65 if (!MaxWGsLDS)
66 return {1, 1};
67
68 const unsigned WaveSize = getWavefrontSize(), WavesPerEU = getMaxWavesPerEU();
69
70 auto PropsFromWGSize = [=](unsigned WGSize)
71 -> std::tuple<const unsigned, const unsigned, unsigned> {
72 unsigned WavesPerWG = divideCeil(WGSize, WaveSize);
73 unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);
74 return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};
75 };
76
77 // The maximum group size will generally yield the minimum number of
78 // workgroups, maximum number of waves, and minimum occupancy. The opposite is
79 // generally true for the minimum group size. LDS or barrier ressource
80 // limitations can flip those minimums/maximums.
81 const auto [MinWGSize, MaxWGSize] = FlatWorkGroupSizes;
82 auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);
83 auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);
84
85 // It is possible that we end up with flipped minimum and maximum number of
86 // waves per CU when the number of minimum/maximum concurrent groups on the CU
87 // is limited by LDS usage or barrier resources.
88 if (MinWavesPerCU >= MaxWavesPerCU) {
89 std::swap(MinWavesPerCU, MaxWavesPerCU);
90 } else {
91 const unsigned WaveSlotsPerCU = WavesPerEU * getEUsPerCU();
92
93 // Look for a potential smaller group size than the maximum which decreases
94 // the concurrent number of waves on the CU for the same number of
95 // concurrent workgroups on the CU.
96 unsigned MinWavesPerCUForWGSize =
97 divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;
98 if (MinWavesPerCU > MinWavesPerCUForWGSize) {
99 unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
100 if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {
101 // There may exist a smaller group size than the maximum that achieves
102 // the minimum number of waves per CU. This group size is the largest
103 // possible size that requires MaxWavesPerWG - E waves where E is
104 // maximized under the following constraints.
105 // 1. 0 <= E <= ExcessSlotsPerWG
106 // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
107 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
108 MaxWavesPerWG - MinWavesPerWG);
109 }
110 }
111
112 // Look for a potential larger group size than the minimum which increases
113 // the concurrent number of waves on the CU for the same number of
114 // concurrent workgroups on the CU.
115 unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
116 if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {
117 // There may exist a larger group size than the minimum that achieves the
118 // maximum number of waves per CU. This group size is the smallest
119 // possible size that requires MinWavesPerWG + L waves where L is
120 // maximized under the following constraints.
121 // 1. 0 <= L <= LeftoverSlotsPerWG
122 // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
123 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,
124 ((MaxWGSize - 1) / WaveSize) + 1 -
125 MinWavesPerWG);
126 }
127 }
128
129 // Return the minimum/maximum number of waves on any EU, assuming that all
130 // wavefronts are spread across all EUs as evenly as possible.
131 return {std::clamp(MinWavesPerCU / getEUsPerCU(), 1U, WavesPerEU),
132 std::clamp(divideCeil(MaxWavesPerCU, getEUsPerCU()), 1U, WavesPerEU)};
133}
134
136 const MachineFunction &MF) const {
137 const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
138 return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
139}
140
141std::pair<unsigned, unsigned>
143 switch (CC) {
150 return std::pair(1, getWavefrontSize());
151 default:
152 return std::pair(1u, getMaxFlatWorkGroupSize());
153 }
154}
155
156std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
157 const Function &F) const {
158 // Default minimum/maximum flat work group sizes.
159 std::pair<unsigned, unsigned> Default =
160 getDefaultFlatWorkGroupSize(F.getCallingConv());
161
162 // Requested minimum/maximum flat work group sizes.
163 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
164 F, "amdgpu-flat-work-group-size", Default);
165
166 // Make sure requested minimum is less than requested maximum.
167 if (Requested.first > Requested.second)
168 return Default;
169
170 // Make sure requested values do not violate subtarget's specifications.
171 if (Requested.first < getMinFlatWorkGroupSize())
172 return Default;
173 if (Requested.second > getMaxFlatWorkGroupSize())
174 return Default;
175
176 return Requested;
177}
178
182
183std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
184 std::pair<unsigned, unsigned> RequestedWavesPerEU,
185 std::pair<unsigned, unsigned> FlatWorkGroupSizes, unsigned LDSBytes) const {
186 // Default minimum/maximum number of waves per EU. The range of flat workgroup
187 // sizes limits the achievable maximum, and we aim to support enough waves per
188 // EU so that we can concurrently execute all waves of a single workgroup of
189 // maximum size on a CU.
190 std::pair<unsigned, unsigned> Default = {
191 getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second),
192 getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
193 Default.first = std::min(Default.first, Default.second);
194
195 // Make sure requested minimum is within the default range and lower than the
196 // requested maximum. The latter must not violate target specification.
197 if (RequestedWavesPerEU.first < Default.first ||
198 RequestedWavesPerEU.first > Default.second ||
199 RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
200 RequestedWavesPerEU.second > getMaxWavesPerEU())
201 return Default;
202
203 // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
204 RequestedWavesPerEU.second =
205 std::min(RequestedWavesPerEU.second, Default.second);
206 return RequestedWavesPerEU;
207}
208
209std::pair<unsigned, unsigned>
211 // Default/requested minimum/maximum flat work group sizes.
212 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
213 // Minimum number of bytes allocated in the LDS.
214 unsigned LDSBytes =
215 AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size", {0, UINT32_MAX},
216 /*OnlyFirstRequired=*/true)
217 .first;
218 return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
219}
220
221std::pair<unsigned, unsigned>
222AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
223 unsigned LDSBytes, const Function &F) const {
224 // Default minimum/maximum number of waves per execution unit.
225 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
226
227 // Requested minimum/maximum number of waves per execution unit.
228 std::pair<unsigned, unsigned> Requested =
229 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
230 return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes, LDSBytes);
231}
232
233std::optional<unsigned>
235 unsigned Dim) const {
236 auto *Node = Kernel.getMetadata("reqd_work_group_size");
237 if (Node && Node->getNumOperands() == 3)
238 return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
239 return std::nullopt;
240}
241
243 const Function &F, bool RequiresUniformYZ) const {
244 auto *Node = F.getMetadata("reqd_work_group_size");
245 if (!Node || Node->getNumOperands() != 3)
246 return false;
247 unsigned XLen =
248 mdconst::extract<ConstantInt>(Node->getOperand(0))->getZExtValue();
249 unsigned YLen =
250 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue();
251 unsigned ZLen =
252 mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
253
254 bool Is1D = YLen <= 1 && ZLen <= 1;
255 bool IsXLargeEnough =
256 isPowerOf2_32(XLen) && (!RequiresUniformYZ || XLen >= getWavefrontSize());
257 return Is1D || IsXLargeEnough;
258}
259
261 return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
262}
263
265 unsigned Dimension) const {
266 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
267 if (ReqdSize)
268 return *ReqdSize - 1;
269 return getFlatWorkGroupSizes(Kernel).second - 1;
270}
271
273 for (int I = 0; I < 3; ++I) {
274 if (getMaxWorkitemID(Func, I) > 0)
275 return false;
276 }
277
278 // If the function may call the WWM intrinsic, just return false as
279 // all threads will be active at some point
280 if (!Func.hasFnAttribute("amdgpu-no-wwm"))
281 return false;
282
283 return true;
284}
285
287 Function *Kernel = I->getFunction();
288 unsigned MinSize = 0;
289 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
290 bool IdQuery = false;
291
292 // If reqd_work_group_size is present it narrows value down.
293 if (auto *CI = dyn_cast<CallInst>(I)) {
294 const Function *F = CI->getCalledFunction();
295 if (F) {
296 unsigned Dim = UINT_MAX;
297 switch (F->getIntrinsicID()) {
298 case Intrinsic::amdgcn_workitem_id_x:
299 case Intrinsic::r600_read_tidig_x:
300 IdQuery = true;
301 [[fallthrough]];
302 case Intrinsic::r600_read_local_size_x:
303 Dim = 0;
304 break;
305 case Intrinsic::amdgcn_workitem_id_y:
306 case Intrinsic::r600_read_tidig_y:
307 IdQuery = true;
308 [[fallthrough]];
309 case Intrinsic::r600_read_local_size_y:
310 Dim = 1;
311 break;
312 case Intrinsic::amdgcn_workitem_id_z:
313 case Intrinsic::r600_read_tidig_z:
314 IdQuery = true;
315 [[fallthrough]];
316 case Intrinsic::r600_read_local_size_z:
317 Dim = 2;
318 break;
319 default:
320 break;
321 }
322
323 if (Dim <= 3) {
324 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
325 if (ReqdSize)
326 MinSize = MaxSize = *ReqdSize;
327 }
328 }
329 }
330
331 if (!MaxSize)
332 return false;
333
334 // Range metadata is [Lo, Hi). For ID query we need to pass max size
335 // as Hi. For size query we need to pass Hi + 1.
336 if (IdQuery)
337 MinSize = 0;
338 else
339 ++MaxSize;
340
341 APInt Lower{32, MinSize};
342 APInt Upper{32, MaxSize};
343 if (auto *CI = dyn_cast<CallBase>(I)) {
345 CI->addRangeRetAttr(Range);
346 } else {
347 MDBuilder MDB(I->getContext());
348 MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper);
349 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
350 }
351 return true;
352}
353
355
356 // We don't allocate the segment if we know the implicit arguments weren't
357 // used, even if the ABI implies we need them.
358 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
359 return 0;
360
361 if (isMesaKernel(F))
362 return 16;
363
364 // Assume all implicit inputs are used by default
365 const Module *M = F.getParent();
366 unsigned NBytes =
368 return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
369 NBytes);
370}
371
373 Align &MaxAlign) const {
374 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
375 F.getCallingConv() == CallingConv::SPIR_KERNEL);
376
377 const DataLayout &DL = F.getDataLayout();
378 uint64_t ExplicitArgBytes = 0;
379 MaxAlign = Align(1);
380
381 for (const Argument &Arg : F.args()) {
382 if (Arg.hasAttribute("amdgpu-hidden-argument"))
383 continue;
384
385 const bool IsByRef = Arg.hasByRefAttr();
386 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
387 Align Alignment = DL.getValueOrABITypeAlignment(
388 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);
389 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
390 ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
391 MaxAlign = std::max(MaxAlign, Alignment);
392 }
393
394 return ExplicitArgBytes;
395}
396
398 Align &MaxAlign) const {
399 if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
400 F.getCallingConv() != CallingConv::SPIR_KERNEL)
401 return 0;
402
403 uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
404
405 unsigned ExplicitOffset = getExplicitKernelArgOffset();
406
407 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
408 unsigned ImplicitBytes = getImplicitArgNumBytes(F);
409 if (ImplicitBytes != 0) {
410 const Align Alignment = getAlignmentForImplicitArgPtr();
411 TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
412 MaxAlign = std::max(MaxAlign, Alignment);
413 }
414
415 // Being able to dereference past the end is useful for emitting scalar loads.
416 return alignTo(TotalSize, 4);
417}
418
423
426 return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
427 return static_cast<const AMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());
428}
429
431 if (TM.getTargetTriple().isAMDGCN())
432 return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
433 return static_cast<const AMDGPUSubtarget &>(
435}
436
437// FIXME: This has no reason to be in subtarget
440 return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
441 std::numeric_limits<uint32_t>::max());
442}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the InstructionSelector class for AMDGPU.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file describes how to lower LLVM inline asm to machine code INLINEASM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
AMDGPU R600 specific subclass of TargetSubtarget.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
AMDGPUSubtarget(const Triple &TT)
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned getMaxWavesPerEU() const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
bool isSingleWavefrontWorkgroup(const Function &F) const
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1069
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:897
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39