27#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define Check(C, ...) \
35 VS.CheckFailed(__VA_ARGS__); \
43 if (
ID->getString() !=
"amdgpu.buffer.oob.mode" &&
44 ID->getString() !=
"amdgpu.tbuffer.oob.mode")
48 "'" +
ID->getString() +
"' module flag must use 'max' merge behaviour");
52 "' module flag must have a constant integer value");
54 "'" +
ID->getString() +
"' module flag must be 0, 1, or 2");
63 if (!VS.TT.isAMDGPU())
66 MDNode *ReqdWorkGroupSize =
F.getMetadata(
"reqd_work_group_size");
67 if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3)
71 for (
const MDOperand &
Op : ReqdWorkGroupSize->operands()) {
73 if (!
C ||
C->getValue().getActiveBits() > 64)
76 if (Dim != 0 && Product > std::numeric_limits<uint64_t>::max() / Dim)
81 Attribute FlatWorkGroupSize =
F.getFnAttribute(
"amdgpu-flat-work-group-size");
82 if (!FlatWorkGroupSize.
isValid()) {
83 VS.CheckFailed(
"reqd_work_group_size requires amdgpu-flat-work-group-size",
84 &
F, ReqdWorkGroupSize);
89 VS.CheckFailed(
"amdgpu-flat-work-group-size must be a string attribute",
95 std::pair<StringRef, StringRef> Values = AttrValue.
split(
',');
98 bool Parsed = !Values.second.contains(
',') &&
102 VS.CheckFailed(
"amdgpu-flat-work-group-size must be a pair of unsigned "
108 if (Min != Product || Max != Product) {
109 VS.CheckFailed(
"amdgpu-flat-work-group-size must equal the product of "
110 "reqd_work_group_size operands",
111 &
F, ReqdWorkGroupSize);
122 if (!VS.TT.isAMDGPU())
126 VS.CheckFailed(
"alloca on amdgpu must be in addrspace(5)", &AI);
133 case Intrinsic::amdgcn_kill:
143 case Intrinsic::amdgcn_kill: {
145 Check(CBI->getNumIndirectDests() == 1,
146 "callbr amdgcn_kill only supports one indirect dest");
151 "callbr amdgcn_kill indirect dest needs to be unreachable");
155 case Intrinsic::amdgcn_cs_chain: {
168 VS.CheckFailed(
"Intrinsic cannot be called from functions with this "
169 "calling convention",
174 Check(
Call.paramHasAttr(2, Attribute::InReg),
175 "SGPR arguments must have the `inreg` attribute", &
Call);
176 Check(!
Call.paramHasAttr(3, Attribute::InReg),
177 "VGPR arguments must not have the `inreg` attribute", &
Call);
181 "flags must be 0 or 1 for llvm.amdgcn.cs.chain", &
Call);
186 Intrinsic::amdgcn_unreachable;
188 "llvm.amdgcn.cs.chain must be followed by unreachable", &
Call);
191 case Intrinsic::amdgcn_init_exec_from_input: {
194 "only inreg arguments to the parent function are valid as inputs to "
199 case Intrinsic::amdgcn_set_inactive_chain_arg: {
206 VS.CheckFailed(
"Intrinsic can only be used from functions with the "
207 "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
208 "calling conventions",
213 unsigned InactiveIdx = 1;
214 Check(!
Call.paramHasAttr(InactiveIdx, Attribute::InReg),
215 "Value for inactive lanes must not have the `inreg` attribute",
218 "Value for inactive lanes must be a function argument", &
Call);
220 "Value for inactive lanes must be a VGPR function argument", &
Call);
223 case Intrinsic::amdgcn_call_whole_wave: {
225 Check(
F,
"Indirect whole wave calls are not allowed", &
Call);
229 "Callee must have the amdgpu_gfx_whole_wave calling convention",
232 Check(!
F->isVarArg(),
"Variadic whole wave calls are not allowed", &
Call);
235 "Call argument count must match callee argument count", &
Call);
237 Check(
F->arg_begin()->getType()->isIntegerTy(1),
238 "Callee must have i1 as its first argument", &
Call);
239 for (
auto [CallArg, FuncArg] :
241 Check(CallArg->getType() == FuncArg.getType(),
242 "Argument types must match", &
Call);
244 Check(
Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
245 FuncArg.hasInRegAttr(),
246 "Argument inreg attributes must match", &
Call);
250 case Intrinsic::amdgcn_s_prefetch_data: {
253 Call.getArgOperand(0)->getType()->getPointerAddressSpace()),
254 "llvm.amdgcn.s.prefetch.data only supports global or constant memory");
257 case Intrinsic::amdgcn_load_to_lds:
258 case Intrinsic::amdgcn_load_async_to_lds:
259 case Intrinsic::amdgcn_global_load_lds:
260 case Intrinsic::amdgcn_global_load_async_lds:
261 case Intrinsic::amdgcn_raw_buffer_load_lds:
262 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
263 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
264 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
265 case Intrinsic::amdgcn_struct_buffer_load_lds:
266 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
267 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
268 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
271 "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, "
276 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
277 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
283 Check(CBSZ <= 4,
"invalid value for cbsz format",
Call,
284 Call.getArgOperand(3));
285 Check(BLGP <= 4,
"invalid value for blgp format",
Call,
286 Call.getArgOperand(4));
288 auto GetFormatNumRegs = [](
unsigned FormatVal) {
304 if (!Ty || !Ty->getElementType()->isIntegerTy(32))
306 unsigned NumElts = Ty->getNumElements();
307 return NumElts == 4 || NumElts == 6 || NumElts == 8;
312 Check(IsValidSrcASrcBVector(Src0Ty),
313 "operand 0 must be 4, 6 or 8 element i32 vector", &
Call, Src0);
314 Check(IsValidSrcASrcBVector(Src1Ty),
315 "operand 1 must be 4, 6 or 8 element i32 vector", &
Call, Src1);
317 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ),
318 "invalid vector type for format", &
Call, Src0,
Call.getArgOperand(3));
319 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP),
320 "invalid vector type for format", &
Call, Src1,
Call.getArgOperand(5));
323 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
324 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
325 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
331 Check(FmtA <= 4,
"invalid value for matrix format",
Call,
332 Call.getArgOperand(0));
333 Check(FmtB <= 4,
"invalid value for matrix format",
Call,
334 Call.getArgOperand(2));
336 auto GetFormatNumRegs = [](
unsigned FormatVal) {
352 if (!Ty || !Ty->getElementType()->isIntegerTy(32))
354 unsigned NumElts = Ty->getNumElements();
355 return NumElts == 16 || NumElts == 12 || NumElts == 8;
360 Check(IsValidSrcASrcBVector(Src0Ty),
361 "operand 1 must be 8, 12 or 16 element i32 vector", &
Call, Src0);
362 Check(IsValidSrcASrcBVector(Src1Ty),
363 "operand 3 must be 8, 12 or 16 element i32 vector", &
Call, Src1);
365 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA),
366 "invalid vector type for format", &
Call, Src0,
Call.getArgOperand(0));
367 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB),
368 "invalid vector type for format", &
Call, Src1,
Call.getArgOperand(2));
371 case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
372 case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
373 case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
374 case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
375 case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
376 case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
380 "cooperative atomic intrinsics require a generic or global pointer",
387 "cooperative atomic intrinsics require that the last argument is a "
392 case Intrinsic::amdgcn_av_load_b128:
393 case Intrinsic::amdgcn_av_store_b128: {
398 "the last argument to av load/store intrinsics must be a "
AMDGPU address space definition.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS, const Function &F)
bool ult(const APInt &RHS) const
Unsigned less than comparison.
an instruction to allocate memory on the stack
unsigned getAddressSpace() const
Return the address space for the allocation.
This class represents an incoming formal argument to a Function.
LLVM_ABI bool hasInRegAttr() const
Return true if this argument has the inreg attribute.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
Class to represent fixed width SIMD vectors.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Max
Takes the max of the two values, which are required to be integers.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isFlatGlobalAddrSpace(unsigned AS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
void verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI)
bool isa_and_nonnull(const Y &Val)
void verifyAMDGPUFunctionMetadata(VerifierSupport &VS, const Function &F)
void verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID, CallBase &Call)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID, Module::ModFlagBehavior MFB, const MDNode *Op)
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
bool isAMDGPUCallBrIntrinsic(Intrinsic::ID ID)