LLVM 23.0.0git
VerifierAMDGPU.cpp
Go to the documentation of this file.
1//===-- VerifierAMDGPU.cpp - AMDGPU-specific IR verification ---------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains AMDGPU-specific IR verification logic that was extracted
10// from Verifier.cpp for code organization purposes only. These checks are
11// always compiled and linked as part of LLVMCore — this is not a target-
12// dependent IR verifier, which would require a different design.
13//
14// This file should only contain checks for AMDGPU-specific IR constructs
15// (e.g. amdgcn intrinsics, AMDGPU address spaces). It must not contain
16// checks for generic IR that might behave differently under AMDGPU.
17//
18//===----------------------------------------------------------------------===//
19
20#include "VerifierInternal.h"
22#include "llvm/IR/CallingConv.h"
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/Function.h"
27#include "llvm/IR/IntrinsicsAMDGPU.h"
29
30using namespace llvm;
31
32#define Check(C, ...) \
33 do { \
34 if (!(C)) { \
35 VS.CheckFailed(__VA_ARGS__); \
36 return; \
37 } \
38 } while (false)
39
42 const MDNode *Op) {
43 if (ID->getString() != "amdgpu.buffer.oob.mode" &&
44 ID->getString() != "amdgpu.tbuffer.oob.mode")
45 return;
46
47 Check(MFB == Module::Max,
48 "'" + ID->getString() + "' module flag must use 'max' merge behaviour");
51 Check(Value, "'" + ID->getString() +
52 "' module flag must have a constant integer value");
53 Check(Value->getZExtValue() <= 2,
54 "'" + ID->getString() + "' module flag must be 0, 1, or 2");
55}
56
57// Verify that when a function has !reqd_work_group_size metadata, it also has
58// an amdgpu-flat-work-group-size attribute that matches the product of the
59// reqd_work_group_size operands.
61 const Function &F) {
62 // This is not required for other targets so we only check for AMDGPU.
63 if (!VS.TT.isAMDGPU())
64 return;
65
66 MDNode *ReqdWorkGroupSize = F.getMetadata("reqd_work_group_size");
67 if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3)
68 return;
69
70 uint64_t Product = 1;
71 for (const MDOperand &Op : ReqdWorkGroupSize->operands()) {
73 if (!C || C->getValue().getActiveBits() > 64)
74 return;
75 uint64_t Dim = C->getZExtValue();
76 if (Dim != 0 && Product > std::numeric_limits<uint64_t>::max() / Dim)
77 return;
78 Product *= Dim;
79 }
80
81 Attribute FlatWorkGroupSize = F.getFnAttribute("amdgpu-flat-work-group-size");
82 if (!FlatWorkGroupSize.isValid()) {
83 VS.CheckFailed("reqd_work_group_size requires amdgpu-flat-work-group-size",
84 &F, ReqdWorkGroupSize);
85 return;
86 }
87
88 if (!FlatWorkGroupSize.isStringAttribute()) {
89 VS.CheckFailed("amdgpu-flat-work-group-size must be a string attribute",
90 &F);
91 return;
92 }
93
94 StringRef AttrValue = FlatWorkGroupSize.getValueAsString();
95 std::pair<StringRef, StringRef> Values = AttrValue.split(',');
96 uint64_t Min = 0;
97 uint64_t Max = 0;
98 bool Parsed = !Values.second.contains(',') &&
99 llvm::to_integer(Values.first.trim(), Min) &&
100 llvm::to_integer(Values.second.trim(), Max);
101 if (!Parsed) {
102 VS.CheckFailed("amdgpu-flat-work-group-size must be a pair of unsigned "
103 "integers",
104 &F);
105 return;
106 }
107
108 if (Min != Product || Max != Product) {
109 VS.CheckFailed("amdgpu-flat-work-group-size must equal the product of "
110 "reqd_work_group_size operands",
111 &F, ReqdWorkGroupSize);
112 }
113}
114
119
121 // This is not required for other targets so we only check for AMDGPU.
122 if (!VS.TT.isAMDGPU())
123 return;
124
126 VS.CheckFailed("alloca on amdgpu must be in addrspace(5)", &AI);
127}
128
130 switch (ID) {
131 default:
132 return false;
133 case Intrinsic::amdgcn_kill:
134 return true;
135 }
136}
137
139 CallBase &Call) {
140 switch (ID) {
141 default:
142 return;
143 case Intrinsic::amdgcn_kill: {
144 if (auto *CBI = dyn_cast<CallBrInst>(&Call)) {
145 Check(CBI->getNumIndirectDests() == 1,
146 "callbr amdgcn_kill only supports one indirect dest");
147 bool Unreachable = isa<UnreachableInst>(CBI->getIndirectDest(0)->begin());
148 CallInst *CI = dyn_cast<CallInst>(CBI->getIndirectDest(0)->begin());
149 Check(Unreachable ||
150 (CI && CI->getIntrinsicID() == Intrinsic::amdgcn_unreachable),
151 "callbr amdgcn_kill indirect dest needs to be unreachable");
152 }
153 break;
154 }
155 case Intrinsic::amdgcn_cs_chain: {
156 CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
157 switch (CallerCC) {
166 break;
167 default:
168 VS.CheckFailed("Intrinsic cannot be called from functions with this "
169 "calling convention",
170 &Call);
171 break;
172 }
173
174 Check(Call.paramHasAttr(2, Attribute::InReg),
175 "SGPR arguments must have the `inreg` attribute", &Call);
176 Check(!Call.paramHasAttr(3, Attribute::InReg),
177 "VGPR arguments must not have the `inreg` attribute", &Call);
178
179 ConstantInt *FlagsArg = cast<ConstantInt>(Call.getArgOperand(4));
180 Check(FlagsArg->getValue().ult(2),
181 "flags must be 0 or 1 for llvm.amdgcn.cs.chain", &Call);
182
183 Instruction *Next = Call.getNextNode();
184 bool IsAMDUnreachable = isa_and_nonnull<IntrinsicInst>(Next) &&
185 cast<IntrinsicInst>(Next)->getIntrinsicID() ==
186 Intrinsic::amdgcn_unreachable;
187 Check(Next && (isa<UnreachableInst>(Next) || IsAMDUnreachable),
188 "llvm.amdgcn.cs.chain must be followed by unreachable", &Call);
189 break;
190 }
191 case Intrinsic::amdgcn_init_exec_from_input: {
192 const Argument *Arg = dyn_cast<Argument>(Call.getOperand(0));
193 Check(Arg && Arg->hasInRegAttr(),
194 "only inreg arguments to the parent function are valid as inputs to "
195 "this intrinsic",
196 &Call);
197 break;
198 }
199 case Intrinsic::amdgcn_set_inactive_chain_arg: {
200 CallingConv::ID CallerCC = Call.getCaller()->getCallingConv();
201 switch (CallerCC) {
204 break;
205 default:
206 VS.CheckFailed("Intrinsic can only be used from functions with the "
207 "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
208 "calling conventions",
209 &Call);
210 break;
211 }
212
213 unsigned InactiveIdx = 1;
214 Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
215 "Value for inactive lanes must not have the `inreg` attribute",
216 &Call);
217 Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
218 "Value for inactive lanes must be a function argument", &Call);
219 Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
220 "Value for inactive lanes must be a VGPR function argument", &Call);
221 break;
222 }
223 case Intrinsic::amdgcn_call_whole_wave: {
224 Function *F = dyn_cast<Function>(Call.getArgOperand(0));
225 Check(F, "Indirect whole wave calls are not allowed", &Call);
226
227 CallingConv::ID CC = F->getCallingConv();
229 "Callee must have the amdgpu_gfx_whole_wave calling convention",
230 &Call);
231
232 Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
233
234 Check(Call.arg_size() == F->arg_size(),
235 "Call argument count must match callee argument count", &Call);
236
237 Check(F->arg_begin()->getType()->isIntegerTy(1),
238 "Callee must have i1 as its first argument", &Call);
239 for (auto [CallArg, FuncArg] :
240 drop_begin(zip_equal(Call.args(), F->args()))) {
241 Check(CallArg->getType() == FuncArg.getType(),
242 "Argument types must match", &Call);
243
244 Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
245 FuncArg.hasInRegAttr(),
246 "Argument inreg attributes must match", &Call);
247 }
248 break;
249 }
250 case Intrinsic::amdgcn_s_prefetch_data: {
251 Check(
253 Call.getArgOperand(0)->getType()->getPointerAddressSpace()),
254 "llvm.amdgcn.s.prefetch.data only supports global or constant memory");
255 break;
256 }
257 case Intrinsic::amdgcn_load_to_lds:
258 case Intrinsic::amdgcn_load_async_to_lds:
259 case Intrinsic::amdgcn_global_load_lds:
260 case Intrinsic::amdgcn_global_load_async_lds:
261 case Intrinsic::amdgcn_raw_buffer_load_lds:
262 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
263 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
264 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
265 case Intrinsic::amdgcn_struct_buffer_load_lds:
266 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
267 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
268 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: {
269 uint64_t Size = cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue();
270 Check(Size == 1 || Size == 2 || Size == 4 || Size == 12 || Size == 16,
271 "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, "
272 "or 16",
273 &Call);
274 break;
275 }
276 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
277 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
278 Value *Src0 = Call.getArgOperand(0);
279 Value *Src1 = Call.getArgOperand(1);
280
281 uint64_t CBSZ = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
282 uint64_t BLGP = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
283 Check(CBSZ <= 4, "invalid value for cbsz format", Call,
284 Call.getArgOperand(3));
285 Check(BLGP <= 4, "invalid value for blgp format", Call,
286 Call.getArgOperand(4));
287
288 auto GetFormatNumRegs = [](unsigned FormatVal) {
289 switch (FormatVal) {
290 case 0:
291 case 1:
292 return 8u;
293 case 2:
294 case 3:
295 return 6u;
296 case 4:
297 return 4u;
298 default:
299 llvm_unreachable("invalid format value");
300 }
301 };
302
303 auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
304 if (!Ty || !Ty->getElementType()->isIntegerTy(32))
305 return false;
306 unsigned NumElts = Ty->getNumElements();
307 return NumElts == 4 || NumElts == 6 || NumElts == 8;
308 };
309
312 Check(IsValidSrcASrcBVector(Src0Ty),
313 "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0);
314 Check(IsValidSrcASrcBVector(Src1Ty),
315 "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1);
316
317 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ),
318 "invalid vector type for format", &Call, Src0, Call.getArgOperand(3));
319 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP),
320 "invalid vector type for format", &Call, Src1, Call.getArgOperand(5));
321 break;
322 }
323 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
324 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
325 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
326 Value *Src0 = Call.getArgOperand(1);
327 Value *Src1 = Call.getArgOperand(3);
328
329 unsigned FmtA = cast<ConstantInt>(Call.getArgOperand(0))->getZExtValue();
330 unsigned FmtB = cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue();
331 Check(FmtA <= 4, "invalid value for matrix format", Call,
332 Call.getArgOperand(0));
333 Check(FmtB <= 4, "invalid value for matrix format", Call,
334 Call.getArgOperand(2));
335
336 auto GetFormatNumRegs = [](unsigned FormatVal) {
337 switch (FormatVal) {
338 case 0:
339 case 1:
340 return 16u;
341 case 2:
342 case 3:
343 return 12u;
344 case 4:
345 return 8u;
346 default:
347 llvm_unreachable("invalid format value");
348 }
349 };
350
351 auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) {
352 if (!Ty || !Ty->getElementType()->isIntegerTy(32))
353 return false;
354 unsigned NumElts = Ty->getNumElements();
355 return NumElts == 16 || NumElts == 12 || NumElts == 8;
356 };
357
360 Check(IsValidSrcASrcBVector(Src0Ty),
361 "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0);
362 Check(IsValidSrcASrcBVector(Src1Ty),
363 "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1);
364
365 Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA),
366 "invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
367 Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB),
368 "invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
369 break;
370 }
371 case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
372 case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
373 case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
374 case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
375 case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
376 case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
377 Value *PtrArg = Call.getArgOperand(0);
378 const unsigned AS = PtrArg->getType()->getPointerAddressSpace();
380 "cooperative atomic intrinsics require a generic or global pointer",
381 &Call, PtrArg);
382
384 cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
385 MDNode *MD = cast<MDNode>(Op->getMetadata());
386 Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
387 "cooperative atomic intrinsics require that the last argument is a "
388 "metadata string",
389 &Call, Op);
390 break;
391 }
392 case Intrinsic::amdgcn_av_load_b128:
393 case Intrinsic::amdgcn_av_store_b128: {
395 cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
396 MDNode *MD = dyn_cast<MDNode>(Op->getMetadata());
397 Check(MD && (MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
398 "the last argument to av load/store intrinsics must be a "
399 "metadata string",
400 &Call, Op);
401 break;
402 }
403 }
404}
405
406#undef Check
AMDGPU address space definition.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition MD5.cpp:54
This file contains some functions that are useful when dealing with strings.
#define Check(C,...)
static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS, const Function &F)
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
an instruction to allocate memory on the stack
unsigned getAddressSpace() const
Return the address space for the allocation.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
LLVM_ABI bool hasInRegAttr() const
Return true if this argument has the inreg attribute.
Definition Function.cpp:288
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
Class to represent fixed width SIMD vectors.
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
A single uniqued string.
Definition Metadata.h:722
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isFlatGlobalAddrSpace(unsigned AS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
void verifyAMDGPUFunctionMetadata(VerifierSupport &VS, const Function &F)
void verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID, CallBase &Call)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID, Module::ModFlagBehavior MFB, const MDNode *Op)
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
bool isAMDGPUCallBrIntrinsic(Intrinsic::ID ID)