LLVM 23.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15#include "AMDGPUCallLowering.h"
17#include "AMDGPULegalizerInfo.h"
19#include "R600Subtarget.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/IR/IntrinsicsR600.h"
28#include "llvm/IR/MDBuilder.h"
29#include <algorithm>
30
31using namespace llvm;
32
33#define DEBUG_TYPE "amdgpu-subtarget"
34
35// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
36// allows the given function to achieve an occupancy of NWaves waves per
37// SIMD / EU, taking into account only the function's *maximum* workgroup size.
38unsigned
40 const Function &F) const {
41 const unsigned WaveSize = getWavefrontSize();
42 const unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
43 const unsigned WavesPerWorkgroup =
44 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
45
46 const unsigned WorkGroupsPerCU =
47 std::max(1u, (NWaves * getEUsPerCU()) / WavesPerWorkgroup);
48
49 return getLocalMemorySize() / WorkGroupsPerCU;
50}
51
53 uint32_t LDSBytes, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
54
55 // FIXME: We should take into account the LDS allocation granularity.
56 const unsigned MaxWGsLDS = getLocalMemorySize() / std::max(LDSBytes, 1u);
57
58 // Queried LDS size may be larger than available on a CU, in which case we
59 // consider the only achievable occupancy to be 1, in line with what we
60 // consider the occupancy to be when the number of requested registers in a
61 // particular bank is higher than the number of available ones in that bank.
62 if (!MaxWGsLDS)
63 return {1, 1};
64
65 const unsigned WaveSize = getWavefrontSize(), WavesPerEU = getMaxWavesPerEU();
66
67 auto PropsFromWGSize = [=](unsigned WGSize)
68 -> std::tuple<const unsigned, const unsigned, unsigned> {
69 unsigned WavesPerWG = divideCeil(WGSize, WaveSize);
70 unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);
71 return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};
72 };
73
74 // The maximum group size will generally yield the minimum number of
75 // workgroups, maximum number of waves, and minimum occupancy. The opposite is
76 // generally true for the minimum group size. LDS or barrier ressource
77 // limitations can flip those minimums/maximums.
78 const auto [MinWGSize, MaxWGSize] = FlatWorkGroupSizes;
79 auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);
80 auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);
81
82 // It is possible that we end up with flipped minimum and maximum number of
83 // waves per CU when the number of minimum/maximum concurrent groups on the CU
84 // is limited by LDS usage or barrier resources.
85 if (MinWavesPerCU >= MaxWavesPerCU) {
86 std::swap(MinWavesPerCU, MaxWavesPerCU);
87 } else {
88 const unsigned WaveSlotsPerCU = WavesPerEU * getEUsPerCU();
89
90 // Look for a potential smaller group size than the maximum which decreases
91 // the concurrent number of waves on the CU for the same number of
92 // concurrent workgroups on the CU.
93 unsigned MinWavesPerCUForWGSize =
94 divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;
95 if (MinWavesPerCU > MinWavesPerCUForWGSize) {
96 unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
97 if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {
98 // There may exist a smaller group size than the maximum that achieves
99 // the minimum number of waves per CU. This group size is the largest
100 // possible size that requires MaxWavesPerWG - E waves where E is
101 // maximized under the following constraints.
102 // 1. 0 <= E <= ExcessSlotsPerWG
103 // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
104 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
105 MaxWavesPerWG - MinWavesPerWG);
106 }
107 }
108
109 // Look for a potential larger group size than the minimum which increases
110 // the concurrent number of waves on the CU for the same number of
111 // concurrent workgroups on the CU.
112 unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
113 if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {
114 // There may exist a larger group size than the minimum that achieves the
115 // maximum number of waves per CU. This group size is the smallest
116 // possible size that requires MinWavesPerWG + L waves where L is
117 // maximized under the following constraints.
118 // 1. 0 <= L <= LeftoverSlotsPerWG
119 // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
120 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,
121 ((MaxWGSize - 1) / WaveSize) + 1 -
122 MinWavesPerWG);
123 }
124 }
125
126 // Return the minimum/maximum number of waves on any EU, assuming that all
127 // wavefronts are spread across all EUs as evenly as possible.
128 return {std::clamp(MinWavesPerCU / getEUsPerCU(), 1U, WavesPerEU),
129 std::clamp(divideCeil(MaxWavesPerCU, getEUsPerCU()), 1U, WavesPerEU)};
130}
131
133 const MachineFunction &MF) const {
134 const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
135 return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
136}
137
138std::pair<unsigned, unsigned>
140 switch (CC) {
147 return std::pair(1, getWavefrontSize());
148 default:
149 return std::pair(1u, getMaxFlatWorkGroupSize());
150 }
151}
152
153std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
154 const Function &F) const {
155 // Default minimum/maximum flat work group sizes.
156 std::pair<unsigned, unsigned> Default =
157 getDefaultFlatWorkGroupSize(F.getCallingConv());
158
159 // Requested minimum/maximum flat work group sizes.
160 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
161 F, "amdgpu-flat-work-group-size", Default);
162
163 // Make sure requested minimum is less than requested maximum.
164 if (Requested.first > Requested.second)
165 return Default;
166
167 // Make sure requested values do not violate subtarget's specifications.
168 if (Requested.first < getMinFlatWorkGroupSize())
169 return Default;
170 if (Requested.second > getMaxFlatWorkGroupSize())
171 return Default;
172
173 return Requested;
174}
175
179
180std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
181 std::pair<unsigned, unsigned> RequestedWavesPerEU,
182 std::pair<unsigned, unsigned> FlatWorkGroupSizes, unsigned LDSBytes) const {
183 // Default minimum/maximum number of waves per EU. The range of flat workgroup
184 // sizes limits the achievable maximum, and we aim to support enough waves per
185 // EU so that we can concurrently execute all waves of a single workgroup of
186 // maximum size on a CU.
187 std::pair<unsigned, unsigned> Default = {
188 getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second),
189 getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
190 Default.first = std::min(Default.first, Default.second);
191
192 // Make sure requested minimum is within the default range and lower than the
193 // requested maximum. The latter must not violate target specification.
194 if (RequestedWavesPerEU.first < Default.first ||
195 RequestedWavesPerEU.first > Default.second ||
196 RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
197 RequestedWavesPerEU.second > getMaxWavesPerEU())
198 return Default;
199
200 // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
201 RequestedWavesPerEU.second =
202 std::min(RequestedWavesPerEU.second, Default.second);
203 return RequestedWavesPerEU;
204}
205
206std::pair<unsigned, unsigned>
208 // Default/requested minimum/maximum flat work group sizes.
209 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
210 // Minimum number of bytes allocated in the LDS.
211 unsigned LDSBytes =
212 AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size", {0, UINT32_MAX},
213 /*OnlyFirstRequired=*/true)
214 .first;
215 return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
216}
217
218std::pair<unsigned, unsigned>
219AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
220 unsigned LDSBytes, const Function &F) const {
221 // Default minimum/maximum number of waves per execution unit.
222 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
223
224 // Requested minimum/maximum number of waves per execution unit.
225 std::pair<unsigned, unsigned> Requested =
226 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
227 return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes, LDSBytes);
228}
229
230std::optional<unsigned>
232 unsigned Dim) const {
233 auto *Node = Kernel.getMetadata("reqd_work_group_size");
234 if (Node && Node->getNumOperands() == 3)
235 return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
236 return std::nullopt;
237}
238
240 const Function &F, bool RequiresUniformYZ) const {
241 auto *Node = F.getMetadata("reqd_work_group_size");
242 if (!Node || Node->getNumOperands() != 3)
243 return false;
244 unsigned XLen =
245 mdconst::extract<ConstantInt>(Node->getOperand(0))->getZExtValue();
246 unsigned YLen =
247 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue();
248 unsigned ZLen =
249 mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
250
251 bool Is1D = YLen <= 1 && ZLen <= 1;
252 bool IsXLargeEnough =
253 isPowerOf2_32(XLen) && (!RequiresUniformYZ || XLen >= getWavefrontSize());
254 return Is1D || IsXLargeEnough;
255}
256
258 return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
259}
260
262 unsigned Dimension) const {
263 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
264 if (ReqdSize)
265 return *ReqdSize - 1;
266 return getFlatWorkGroupSizes(Kernel).second - 1;
267}
268
270 for (int I = 0; I < 3; ++I) {
271 if (getMaxWorkitemID(Func, I) > 0)
272 return false;
273 }
274
275 // If the function may call the WWM intrinsic, just return false as
276 // all threads will be active at some point
277 if (!Func.hasFnAttribute("amdgpu-no-wwm"))
278 return false;
279
280 return true;
281}
282
284 Function *Kernel = I->getFunction();
285 unsigned MinSize = 0;
286 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
287 bool IdQuery = false;
288
289 // If reqd_work_group_size is present it narrows value down.
290 if (auto *CI = dyn_cast<CallInst>(I)) {
291 const Function *F = CI->getCalledFunction();
292 if (F) {
293 unsigned Dim = UINT_MAX;
294 switch (F->getIntrinsicID()) {
295 case Intrinsic::amdgcn_workitem_id_x:
296 case Intrinsic::r600_read_tidig_x:
297 IdQuery = true;
298 [[fallthrough]];
299 case Intrinsic::r600_read_local_size_x:
300 Dim = 0;
301 break;
302 case Intrinsic::amdgcn_workitem_id_y:
303 case Intrinsic::r600_read_tidig_y:
304 IdQuery = true;
305 [[fallthrough]];
306 case Intrinsic::r600_read_local_size_y:
307 Dim = 1;
308 break;
309 case Intrinsic::amdgcn_workitem_id_z:
310 case Intrinsic::r600_read_tidig_z:
311 IdQuery = true;
312 [[fallthrough]];
313 case Intrinsic::r600_read_local_size_z:
314 Dim = 2;
315 break;
316 default:
317 break;
318 }
319
320 if (Dim <= 3) {
321 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
322 if (ReqdSize)
323 MinSize = MaxSize = *ReqdSize;
324 }
325 }
326 }
327
328 if (!MaxSize)
329 return false;
330
331 // Range metadata is [Lo, Hi). For ID query we need to pass max size
332 // as Hi. For size query we need to pass Hi + 1.
333 if (IdQuery)
334 MinSize = 0;
335 else
336 ++MaxSize;
337
338 APInt Lower{32, MinSize};
339 APInt Upper{32, MaxSize};
340 if (auto *CI = dyn_cast<CallBase>(I)) {
342 CI->addRangeRetAttr(Range);
343 } else {
344 MDBuilder MDB(I->getContext());
345 MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper);
346 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
347 }
348 return true;
349}
350
352
353 // We don't allocate the segment if we know the implicit arguments weren't
354 // used, even if the ABI implies we need them.
355 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
356 return 0;
357
358 if (isMesaKernel(F))
359 return 16;
360
361 // Assume all implicit inputs are used by default
362 const Module *M = F.getParent();
363 unsigned NBytes =
365 return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
366 NBytes);
367}
368
370 Align &MaxAlign) const {
371 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
372 F.getCallingConv() == CallingConv::SPIR_KERNEL);
373
374 const DataLayout &DL = F.getDataLayout();
375 uint64_t ExplicitArgBytes = 0;
376 MaxAlign = Align(1);
377
378 for (const Argument &Arg : F.args()) {
379 if (Arg.hasAttribute("amdgpu-hidden-argument"))
380 continue;
381
382 const bool IsByRef = Arg.hasByRefAttr();
383 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
384 Align Alignment = DL.getValueOrABITypeAlignment(
385 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);
386 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
387 ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
388 MaxAlign = std::max(MaxAlign, Alignment);
389 }
390
391 return ExplicitArgBytes;
392}
393
395 Align &MaxAlign) const {
396 if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
397 F.getCallingConv() != CallingConv::SPIR_KERNEL)
398 return 0;
399
400 uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
401
402 unsigned ExplicitOffset = getExplicitKernelArgOffset();
403
404 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
405 unsigned ImplicitBytes = getImplicitArgNumBytes(F);
406 if (ImplicitBytes != 0) {
407 const Align Alignment = getAlignmentForImplicitArgPtr();
408 TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
409 MaxAlign = std::max(MaxAlign, Alignment);
410 }
411
412 // Being able to dereference past the end is useful for emitting scalar loads.
413 return alignTo(TotalSize, 4);
414}
415
420
423 return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
424 return static_cast<const AMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());
425}
426
428 if (TM.getTargetTriple().isAMDGCN())
429 return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
430 return static_cast<const AMDGPUSubtarget &>(
432}
433
434// FIXME: This has no reason to be in subtarget
437 return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
438 std::numeric_limits<uint32_t>::max());
439}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the InstructionSelector class for AMDGPU.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file describes how to lower LLVM inline asm to machine code INLINEASM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
AMDGPU R600 specific subclass of TargetSubtarget.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
AMDGPUSubtarget(const Triple &TT)
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned getMaxWavesPerEU() const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
bool isSingleWavefrontWorkgroup(const Function &F) const
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:954
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39