36 ArgAlign =
DL.getABITypeAlign(Ty);
38 return std::pair(Ty, *ArgAlign);
44 "amdgpu-dump-hsa-metadata",
45 cl::desc(
"Dump AMDGPU HSA Metadata"));
47 "amdgpu-verify-hsa-metadata",
48 cl::desc(
"Verify AMDGPU HSA Metadata"));
58 errs() <<
"AMDGPU HSA Metadata:\n" << HSAMetadataString <<
'\n';
62 errs() <<
"AMDGPU HSA Metadata Parser Test: ";
66 if (!FromHSAMetadataString.
fromYAML(HSAMetadataString)) {
71 std::string ToHSAMetadataString;
73 FromHSAMetadataString.
toYAML(StrOS);
75 errs() << (HSAMetadataString == StrOS.
str() ?
"PASS" :
"FAIL") <<
'\n';
76 if (HSAMetadataString != ToHSAMetadataString) {
77 errs() <<
"Original input: " << HSAMetadataString <<
'\n'
78 <<
"Produced output: " << StrOS.
str() <<
'\n';
82std::optional<StringRef>
85 .Case(
"read_only",
StringRef(
"read_only"))
86 .Case(
"write_only",
StringRef(
"write_only"))
87 .Case(
"read_write",
StringRef(
"read_write"))
118 .
Case(
"image1d_t",
"image")
119 .
Case(
"image1d_array_t",
"image")
120 .
Case(
"image1d_buffer_t",
"image")
121 .
Case(
"image2d_t",
"image")
122 .
Case(
"image2d_array_t",
"image")
123 .
Case(
"image2d_array_depth_t",
"image")
124 .
Case(
"image2d_array_msaa_t",
"image")
125 .
Case(
"image2d_array_msaa_depth_t",
"image")
126 .
Case(
"image2d_depth_t",
"image")
127 .
Case(
"image2d_msaa_t",
"image")
128 .
Case(
"image2d_msaa_depth_t",
"image")
129 .
Case(
"image3d_t",
"image")
130 .
Case(
"sampler_t",
"sampler")
131 .
Case(
"queue_t",
"queue")
134 ?
"dynamic_shared_pointer"
167 auto VecTy = cast<FixedVectorType>(Ty);
168 auto ElTy = VecTy->getElementType();
169 auto NumElements = VecTy->getNumElements();
180 if (Node->getNumOperands() != 3)
183 for (
auto &
Op : Node->operands())
184 Dims.push_back(Dims.getDocument()->getNode(
185 uint64_t(mdconst::extract<ConstantInt>(
Op)->getZExtValue())));
191 Version.push_back(Version.getDocument()->getNode(
VersionMajorV4));
192 Version.push_back(Version.getDocument()->getNode(
VersionMinorV4));
203 auto Node =
Mod.getNamedMetadata(
"llvm.printf.fmts");
208 for (
auto *
Op : Node->operands())
210 Printf.push_back(Printf.getDocument()->getNode(
211 cast<MDString>(
Op->getOperand(0))->getString(),
true));
218 auto Node = Func.getParent()->getNamedMetadata(
"opencl.ocl.version");
219 if (!Node || !Node->getNumOperands())
221 auto Op0 = Node->getOperand(0);
222 if (Op0->getNumOperands() <= 1)
228 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
230 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
237 if (
auto Node = Func.getMetadata(
"reqd_work_group_size"))
239 if (
auto Node = Func.getMetadata(
"work_group_size_hint"))
241 if (
auto Node = Func.getMetadata(
"vec_type_hint")) {
244 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
245 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
248 if (Func.hasFnAttribute(
"runtime-handle")) {
250 Func.getFnAttribute(
"runtime-handle").getValueAsString().str(),
253 if (Func.hasFnAttribute(
"device-init"))
255 else if (Func.hasFnAttribute(
"device-fini"))
264 for (
auto &Arg : Func.args())
269 Kern[
".args"] = Args;
280 Node = Func->getMetadata(
"kernel_arg_name");
281 if (Node && ArgNo < Node->getNumOperands())
282 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
287 Node = Func->getMetadata(
"kernel_arg_type");
288 if (Node && ArgNo < Node->getNumOperands())
289 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
292 Node = Func->getMetadata(
"kernel_arg_base_type");
293 if (Node && ArgNo < Node->getNumOperands())
294 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
300 ActAccQual =
"read_only";
302 ActAccQual =
"write_only";
306 Node = Func->getMetadata(
"kernel_arg_access_qual");
307 if (Node && ArgNo < Node->getNumOperands())
308 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
311 Node = Func->getMetadata(
"kernel_arg_type_qual");
312 if (Node && ArgNo < Node->getNumOperands())
313 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
315 const DataLayout &
DL = Func->getParent()->getDataLayout();
321 if (
auto PtrTy = dyn_cast<PointerType>(Ty)) {
333 PointeeAlign,
Name, TypeName, BaseTypeName, ActAccQual,
342 auto Arg = Args.getDocument()->getMapNode();
345 Arg[
".name"] = Arg.getDocument()->getNode(
Name,
true);
346 if (!TypeName.empty())
347 Arg[
".type_name"] = Arg.getDocument()->getNode(TypeName,
true);
348 auto Size =
DL.getTypeAllocSize(Ty);
349 Arg[
".size"] = Arg.getDocument()->getNode(
Size);
351 Arg[
".offset"] = Arg.getDocument()->getNode(
Offset);
353 Arg[
".value_kind"] = Arg.getDocument()->getNode(
ValueKind,
true);
355 Arg[
".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
357 if (
auto PtrTy = dyn_cast<PointerType>(Ty))
361 Arg[
".address_space"] = Arg.getDocument()->getNode(*Qualifier,
365 Arg[
".access"] = Arg.getDocument()->getNode(*AQ,
true);
368 Arg[
".actual_access"] = Arg.getDocument()->getNode(*AAQ,
true);
371 TypeQual.
split(SplitTypeQuals,
" ", -1,
false);
374 Arg[
".is_const"] = Arg.getDocument()->getNode(
true);
375 else if (Key ==
"restrict")
376 Arg[
".is_restrict"] = Arg.getDocument()->getNode(
true);
377 else if (Key ==
"volatile")
378 Arg[
".is_volatile"] = Arg.getDocument()->getNode(
true);
379 else if (Key ==
"pipe")
380 Arg[
".is_pipe"] = Arg.getDocument()->getNode(
true);
391 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
392 if (!HiddenArgNumBytes)
395 const Module *M = Func.getParent();
396 auto &
DL = M->getDataLayout();
401 if (HiddenArgNumBytes >= 8)
404 if (HiddenArgNumBytes >= 16)
407 if (HiddenArgNumBytes >= 24)
414 if (HiddenArgNumBytes >= 32) {
418 if (M->getNamedMetadata(
"llvm.printf.fmts"))
421 else if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr"))
430 if (HiddenArgNumBytes >= 40) {
431 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
439 if (HiddenArgNumBytes >= 48) {
440 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
449 if (HiddenArgNumBytes >= 56) {
450 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
462 unsigned CodeObjectVersion)
const {
469 if (!
Value->evaluateAsAbsolute(Val)) {
479 Align MaxKernArgAlign;
480 Kern[
".kernarg_segment_size"] = Kern.getDocument()->getNode(
482 Kern[
".group_segment_fixed_size"] =
483 Kern.getDocument()->getNode(ProgramInfo.
LDSSize);
484 Kern[
".private_segment_fixed_size"] =
485 Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.
ScratchSize));
487 Kern[
".uses_dynamic_stack"] = Kern.getDocument()->getNode(
492 Kern[
".workgroup_processor_mode"] =
493 Kern.getDocument()->getNode(ProgramInfo.
WgpMode);
496 Kern[
".kernarg_segment_align"] =
497 Kern.getDocument()->getNode(std::max(
Align(4), MaxKernArgAlign).
value());
498 Kern[
".wavefront_size"] =
500 Kern[
".sgpr_count"] =
501 Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.
NumSGPR));
502 Kern[
".vgpr_count"] =
503 Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.
NumVGPR));
507 Kern[
".agpr_count"] =
508 Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.
NumAccVGPR));
511 Kern[
".max_flat_workgroup_size"] =
512 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
513 unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
514 unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
515 unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
516 if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
517 Kern[
".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
518 Kern[
".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
519 Kern[
".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
521 Kern[
".sgpr_spill_count"] =
522 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
523 Kern[
".vgpr_spill_count"] =
524 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
542 std::string HSAMetadataString;
559 auto CodeObjectVersion =
569 (
Twine(Func.getName()) +
Twine(
".kd")).str(),
true);
575 Kernels.push_back(Kern);
584 Version.push_back(Version.getDocument()->getNode(
VersionMajorV5));
585 Version.push_back(Version.getDocument()->getNode(
VersionMinorV5));
595 if (ST.getImplicitArgNumBytes(Func) == 0)
598 const Module *M = Func.getParent();
599 auto &
DL = M->getDataLayout();
634 if (M->getNamedMetadata(
"llvm.printf.fmts")) {
641 if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr")) {
648 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
655 if (!Func.hasFnAttribute(
"amdgpu-no-heap-ptr"))
660 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
667 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
686 if (!ST.hasApertureRegs()) {
701 if (Func.getFnAttribute(
"uniform-work-group-size").getValueAsBool())
711 Version.push_back(Version.getDocument()->getNode(
VersionMajorV6));
712 Version.push_back(Version.getDocument()->getNode(
VersionMinorV6));
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Given that RA is a live value
AMD GCN specific subclass of TargetSubtarget.
Module.h This file contains the declarations for the Module class.
Defines struct to track resource usage and hardware flags for kernels and entry functions.
bool isDynamicLDSUsed() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
std::string toString() const
This class represents an incoming formal argument to a Function.
Type * getParamByRefType() const
If this is a byref argument, return its type.
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
bool hasAttribute(Attribute::AttrKind Kind) const
Check if an argument has a given attribute.
const Function * getParent() const
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
Context object for machine code objects.
void reportError(SMLoc L, const Twine &Msg)
Base class for the full range of assembler expressions which are needed for parsing.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A Module instance is used to store all the information related to an LLVM module.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Represents a location in source code.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ DoubleTyID
64-bit floating point type
static IntegerType * getInt16Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
unsigned LanguageVersion(SourceLanguage L)
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV6
HSA metadata minor version for code object V6.
constexpr uint32_t VersionMajorV6
HSA metadata major version for code object V6.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getAMDHSACodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Track resource usage for kernels / entry functions.
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * ScratchSize