LLVM 17.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22using namespace llvm;
23
24static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
31 }
32
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
35
36 return std::pair(Ty, *ArgAlign);
37}
38
39namespace llvm {
40
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
47
48namespace AMDGPU {
49namespace HSAMD {
50
51//===----------------------------------------------------------------------===//
52// HSAMetadataStreamerV2
53//===----------------------------------------------------------------------===//
54void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56}
57
58void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
60
61 HSAMD::Metadata FromHSAMetadataString;
62 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63 errs() << "FAIL\n";
64 return;
65 }
66
67 std::string ToHSAMetadataString;
68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69 errs() << "FAIL\n";
70 return;
71 }
72
73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74 << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << ToHSAMetadataString << '\n';
78 }
79}
80
82MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
83 if (AccQual.empty())
85
86 return StringSwitch<AccessQualifier>(AccQual)
87 .Case("read_only", AccessQualifier::ReadOnly)
88 .Case("write_only", AccessQualifier::WriteOnly)
89 .Case("read_write", AccessQualifier::ReadWrite)
91}
92
94MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
95 switch (AddressSpace) {
108 default:
110 }
111}
112
113ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
114 StringRef BaseTypeName) const {
115 if (TypeQual.contains("pipe"))
116 return ValueKind::Pipe;
117
118 return StringSwitch<ValueKind>(BaseTypeName)
119 .Case("image1d_t", ValueKind::Image)
120 .Case("image1d_array_t", ValueKind::Image)
121 .Case("image1d_buffer_t", ValueKind::Image)
122 .Case("image2d_t", ValueKind::Image)
123 .Case("image2d_array_t", ValueKind::Image)
124 .Case("image2d_array_depth_t", ValueKind::Image)
125 .Case("image2d_array_msaa_t", ValueKind::Image)
126 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
127 .Case("image2d_depth_t", ValueKind::Image)
128 .Case("image2d_msaa_t", ValueKind::Image)
129 .Case("image2d_msaa_depth_t", ValueKind::Image)
130 .Case("image3d_t", ValueKind::Image)
131 .Case("sampler_t", ValueKind::Sampler)
132 .Case("queue_t", ValueKind::Queue)
133 .Default(isa<PointerType>(Ty) ?
134 (Ty->getPointerAddressSpace() ==
139}
140
141std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
142 switch (Ty->getTypeID()) {
143 case Type::IntegerTyID: {
144 if (!Signed)
145 return (Twine('u') + getTypeName(Ty, true)).str();
146
147 auto BitWidth = Ty->getIntegerBitWidth();
148 switch (BitWidth) {
149 case 8:
150 return "char";
151 case 16:
152 return "short";
153 case 32:
154 return "int";
155 case 64:
156 return "long";
157 default:
158 return (Twine('i') + Twine(BitWidth)).str();
159 }
160 }
161 case Type::HalfTyID:
162 return "half";
163 case Type::FloatTyID:
164 return "float";
165 case Type::DoubleTyID:
166 return "double";
168 auto VecTy = cast<FixedVectorType>(Ty);
169 auto ElTy = VecTy->getElementType();
170 auto NumElements = VecTy->getNumElements();
171 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
172 }
173 default:
174 return "unknown";
175 }
176}
177
178std::vector<uint32_t>
179MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
180 std::vector<uint32_t> Dims;
181 if (Node->getNumOperands() != 3)
182 return Dims;
183
184 for (auto &Op : Node->operands())
185 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
186 return Dims;
187}
188
189Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
190 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
191 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
193 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
194 const Function &F = MF.getFunction();
195
196 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
197 F.getCallingConv() == CallingConv::SPIR_KERNEL);
198
199 Align MaxKernArgAlign;
200 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
201 MaxKernArgAlign);
202 HSACodeProps.mKernargSegmentAlign =
203 std::max(MaxKernArgAlign, Align(4)).value();
204
205 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
206 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
207 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
208 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
209 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
210 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
211 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
212 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
213 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
214 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
215
216 return HSACodeProps;
217}
218
219Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
220 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
221 return HSAMD::Kernel::DebugProps::Metadata();
222}
223
225 auto &Version = HSAMetadata.mVersion;
226
227 Version.push_back(VersionMajorV2);
228 Version.push_back(VersionMinorV2);
229}
230
231void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
232 auto &Printf = HSAMetadata.mPrintf;
233
234 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
235 if (!Node)
236 return;
237
238 for (auto *Op : Node->operands())
239 if (Op->getNumOperands())
240 Printf.push_back(
241 std::string(cast<MDString>(Op->getOperand(0))->getString()));
242}
243
244void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
245 auto &Kernel = HSAMetadata.mKernels.back();
246
247 // TODO: What about other languages?
248 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
249 if (!Node || !Node->getNumOperands())
250 return;
251 auto Op0 = Node->getOperand(0);
252 if (Op0->getNumOperands() <= 1)
253 return;
254
255 Kernel.mLanguage = "OpenCL C";
256 Kernel.mLanguageVersion.push_back(
257 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
258 Kernel.mLanguageVersion.push_back(
259 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
260}
261
262void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
263 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
264
265 if (auto Node = Func.getMetadata("reqd_work_group_size"))
266 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
267 if (auto Node = Func.getMetadata("work_group_size_hint"))
268 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
269 if (auto Node = Func.getMetadata("vec_type_hint")) {
270 Attrs.mVecTypeHint = getTypeName(
271 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
272 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
273 }
274 if (Func.hasFnAttribute("runtime-handle")) {
275 Attrs.mRuntimeHandle =
276 Func.getFnAttribute("runtime-handle").getValueAsString().str();
277 }
278}
279
280void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
281 const GCNSubtarget &ST) {
282 for (auto &Arg : Func.args())
283 emitKernelArg(Arg);
284
285 emitHiddenKernelArgs(Func, ST);
286}
287
288void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
289 auto Func = Arg.getParent();
290 auto ArgNo = Arg.getArgNo();
291 const MDNode *Node;
292
294 Node = Func->getMetadata("kernel_arg_name");
295 if (Node && ArgNo < Node->getNumOperands())
296 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
297 else if (Arg.hasName())
298 Name = Arg.getName();
299
301 Node = Func->getMetadata("kernel_arg_type");
302 if (Node && ArgNo < Node->getNumOperands())
303 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
304
305 StringRef BaseTypeName;
306 Node = Func->getMetadata("kernel_arg_base_type");
307 if (Node && ArgNo < Node->getNumOperands())
308 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
309
311 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
312 Arg.hasNoAliasAttr()) {
313 AccQual = "read_only";
314 } else {
315 Node = Func->getMetadata("kernel_arg_access_qual");
316 if (Node && ArgNo < Node->getNumOperands())
317 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
318 }
319
320 StringRef TypeQual;
321 Node = Func->getMetadata("kernel_arg_type_qual");
322 if (Node && ArgNo < Node->getNumOperands())
323 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324
325 const DataLayout &DL = Func->getParent()->getDataLayout();
326
328 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
329 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
330 // FIXME: Should report this for all address spaces
331 PointeeAlign = Arg.getParamAlign().valueOrOne();
332 }
333 }
334
335 Type *ArgTy;
336 Align ArgAlign;
337 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
338
339 emitKernelArg(DL, ArgTy, ArgAlign,
340 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
341 TypeName, BaseTypeName, AccQual, TypeQual);
342}
343
344void MetadataStreamerYamlV2::emitKernelArg(
345 const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
346 MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName,
347 StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
348 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
349 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
350
351 Arg.mName = std::string(Name);
352 Arg.mTypeName = std::string(TypeName);
353 Arg.mSize = DL.getTypeAllocSize(Ty);
354 Arg.mAlign = Alignment.value();
355 Arg.mValueKind = ValueKind;
356 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
357
358 if (auto PtrTy = dyn_cast<PointerType>(Ty))
359 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
360
361 Arg.mAccQual = getAccessQualifier(AccQual);
362
363 // TODO: Emit Arg.mActualAccQual.
364
365 SmallVector<StringRef, 1> SplitTypeQuals;
366 TypeQual.split(SplitTypeQuals, " ", -1, false);
367 for (StringRef Key : SplitTypeQuals) {
368 auto P = StringSwitch<bool*>(Key)
369 .Case("const", &Arg.mIsConst)
370 .Case("restrict", &Arg.mIsRestrict)
371 .Case("volatile", &Arg.mIsVolatile)
372 .Case("pipe", &Arg.mIsPipe)
373 .Default(nullptr);
374 if (P)
375 *P = true;
376 }
377}
378
379void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
380 const GCNSubtarget &ST) {
381 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
382 if (!HiddenArgNumBytes)
383 return;
384
385 auto &DL = Func.getParent()->getDataLayout();
386 auto Int64Ty = Type::getInt64Ty(Func.getContext());
387
388 if (HiddenArgNumBytes >= 8)
389 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
390 if (HiddenArgNumBytes >= 16)
391 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
392 if (HiddenArgNumBytes >= 24)
393 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
394
395 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
397
398 if (HiddenArgNumBytes >= 32) {
399 // We forbid the use of features requiring hostcall when compiling OpenCL
400 // before code object V5, which makes the mutual exclusion between the
401 // "printf buffer" and "hostcall buffer" here sound.
402 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
403 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
404 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
405 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
406 else
407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
408 }
409
410 // Emit "default queue" and "completion action" arguments if enqueue kernel is
411 // used, otherwise emit dummy "none" arguments.
412 if (HiddenArgNumBytes >= 40) {
413 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
415 } else {
416 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
417 }
418 }
419
420 if (HiddenArgNumBytes >= 48) {
421 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
422 // FIXME: Hack for runtime bug if we fail to optimize this out
423 Func.hasFnAttribute("calls-enqueue-kernel")) {
424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
425 } else {
426 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
427 }
428 }
429
430 // Emit the pointer argument for multi-grid object.
431 if (HiddenArgNumBytes >= 56) {
432 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
433 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
434 else
435 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
436 }
437}
438
440 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
441}
442
444 const IsaInfo::AMDGPUTargetID &TargetID) {
445 emitVersion();
446 emitPrintf(Mod);
447}
448
450 std::string HSAMetadataString;
451 if (toString(HSAMetadata, HSAMetadataString))
452 return;
453
454 if (DumpHSAMetadata)
455 dump(HSAMetadataString);
457 verify(HSAMetadataString);
458}
459
461 const SIProgramInfo &ProgramInfo) {
462 auto &Func = MF.getFunction();
463 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
464 return;
465
466 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
467 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
468
469 HSAMetadata.mKernels.push_back(Kernel::Metadata());
470 auto &Kernel = HSAMetadata.mKernels.back();
471
472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
473 Kernel.mName = std::string(Func.getName());
474 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
475 emitKernelLanguage(Func);
476 emitKernelAttrs(Func);
477 emitKernelArgs(Func, ST);
478 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
479 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
480}
481
482//===----------------------------------------------------------------------===//
483// HSAMetadataStreamerV3
484//===----------------------------------------------------------------------===//
485
486void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
487 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
488}
489
490void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
491 errs() << "AMDGPU HSA Metadata Parser Test: ";
492
493 msgpack::Document FromHSAMetadataString;
494
495 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
496 errs() << "FAIL\n";
497 return;
498 }
499
500 std::string ToHSAMetadataString;
501 raw_string_ostream StrOS(ToHSAMetadataString);
502 FromHSAMetadataString.toYAML(StrOS);
503
504 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
505 if (HSAMetadataString != ToHSAMetadataString) {
506 errs() << "Original input: " << HSAMetadataString << '\n'
507 << "Produced output: " << StrOS.str() << '\n';
508 }
509}
510
511std::optional<StringRef>
514 .Case("read_only", StringRef("read_only"))
515 .Case("write_only", StringRef("write_only"))
516 .Case("read_write", StringRef("read_write"))
517 .Default(std::nullopt);
518}
519
521 unsigned AddressSpace) const {
522 switch (AddressSpace) {
524 return StringRef("private");
526 return StringRef("global");
528 return StringRef("constant");
530 return StringRef("local");
532 return StringRef("generic");
534 return StringRef("region");
535 default:
536 return std::nullopt;
537 }
538}
539
542 StringRef BaseTypeName) const {
543 if (TypeQual.contains("pipe"))
544 return "pipe";
545
546 return StringSwitch<StringRef>(BaseTypeName)
547 .Case("image1d_t", "image")
548 .Case("image1d_array_t", "image")
549 .Case("image1d_buffer_t", "image")
550 .Case("image2d_t", "image")
551 .Case("image2d_array_t", "image")
552 .Case("image2d_array_depth_t", "image")
553 .Case("image2d_array_msaa_t", "image")
554 .Case("image2d_array_msaa_depth_t", "image")
555 .Case("image2d_depth_t", "image")
556 .Case("image2d_msaa_t", "image")
557 .Case("image2d_msaa_depth_t", "image")
558 .Case("image3d_t", "image")
559 .Case("sampler_t", "sampler")
560 .Case("queue_t", "queue")
561 .Default(isa<PointerType>(Ty)
563 ? "dynamic_shared_pointer"
564 : "global_buffer")
565 : "by_value");
566}
567
569 bool Signed) const {
570 switch (Ty->getTypeID()) {
571 case Type::IntegerTyID: {
572 if (!Signed)
573 return (Twine('u') + getTypeName(Ty, true)).str();
574
575 auto BitWidth = Ty->getIntegerBitWidth();
576 switch (BitWidth) {
577 case 8:
578 return "char";
579 case 16:
580 return "short";
581 case 32:
582 return "int";
583 case 64:
584 return "long";
585 default:
586 return (Twine('i') + Twine(BitWidth)).str();
587 }
588 }
589 case Type::HalfTyID:
590 return "half";
591 case Type::FloatTyID:
592 return "float";
593 case Type::DoubleTyID:
594 return "double";
596 auto VecTy = cast<FixedVectorType>(Ty);
597 auto ElTy = VecTy->getElementType();
598 auto NumElements = VecTy->getNumElements();
599 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
600 }
601 default:
602 return "unknown";
603 }
604}
605
608 auto Dims = HSAMetadataDoc->getArrayNode();
609 if (Node->getNumOperands() != 3)
610 return Dims;
611
612 for (auto &Op : Node->operands())
613 Dims.push_back(Dims.getDocument()->getNode(
614 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
615 return Dims;
616}
617
619 auto Version = HSAMetadataDoc->getArrayNode();
620 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
621 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
622 getRootMetadata("amdhsa.version") = Version;
623}
624
626 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
627 if (!Node)
628 return;
629
630 auto Printf = HSAMetadataDoc->getArrayNode();
631 for (auto *Op : Node->operands())
632 if (Op->getNumOperands())
633 Printf.push_back(Printf.getDocument()->getNode(
634 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
635 getRootMetadata("amdhsa.printf") = Printf;
636}
637
639 msgpack::MapDocNode Kern) {
640 // TODO: What about other languages?
641 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
642 if (!Node || !Node->getNumOperands())
643 return;
644 auto Op0 = Node->getOperand(0);
645 if (Op0->getNumOperands() <= 1)
646 return;
647
648 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
650 LanguageVersion.push_back(Kern.getDocument()->getNode(
651 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
652 LanguageVersion.push_back(Kern.getDocument()->getNode(
653 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
654 Kern[".language_version"] = LanguageVersion;
655}
656
658 msgpack::MapDocNode Kern) {
659
660 if (auto Node = Func.getMetadata("reqd_work_group_size"))
661 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
662 if (auto Node = Func.getMetadata("work_group_size_hint"))
663 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
664 if (auto Node = Func.getMetadata("vec_type_hint")) {
665 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
667 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
668 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
669 /*Copy=*/true);
670 }
671 if (Func.hasFnAttribute("runtime-handle")) {
672 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
673 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
674 /*Copy=*/true);
675 }
676 if (Func.hasFnAttribute("device-init"))
677 Kern[".kind"] = Kern.getDocument()->getNode("init");
678 else if (Func.hasFnAttribute("device-fini"))
679 Kern[".kind"] = Kern.getDocument()->getNode("fini");
680}
681
683 msgpack::MapDocNode Kern) {
684 auto &Func = MF.getFunction();
685 unsigned Offset = 0;
686 auto Args = HSAMetadataDoc->getArrayNode();
687 for (auto &Arg : Func.args())
688 emitKernelArg(Arg, Offset, Args);
689
690 emitHiddenKernelArgs(MF, Offset, Args);
691
692 Kern[".args"] = Args;
693}
694
696 unsigned &Offset,
698 auto Func = Arg.getParent();
699 auto ArgNo = Arg.getArgNo();
700 const MDNode *Node;
701
703 Node = Func->getMetadata("kernel_arg_name");
704 if (Node && ArgNo < Node->getNumOperands())
705 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
706 else if (Arg.hasName())
707 Name = Arg.getName();
708
709 StringRef TypeName;
710 Node = Func->getMetadata("kernel_arg_type");
711 if (Node && ArgNo < Node->getNumOperands())
712 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
713
714 StringRef BaseTypeName;
715 Node = Func->getMetadata("kernel_arg_base_type");
716 if (Node && ArgNo < Node->getNumOperands())
717 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
718
719 StringRef AccQual;
720 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
721 Arg.hasNoAliasAttr()) {
722 AccQual = "read_only";
723 } else {
724 Node = Func->getMetadata("kernel_arg_access_qual");
725 if (Node && ArgNo < Node->getNumOperands())
726 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
727 }
728
729 StringRef TypeQual;
730 Node = Func->getMetadata("kernel_arg_type_qual");
731 if (Node && ArgNo < Node->getNumOperands())
732 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
733
734 const DataLayout &DL = Func->getParent()->getDataLayout();
735
736 MaybeAlign PointeeAlign;
737 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
738
739 // FIXME: Need to distinguish in memory alignment from pointer alignment.
740 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
741 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
742 PointeeAlign = Arg.getParamAlign().valueOrOne();
743 }
744
745 // There's no distinction between byval aggregates and raw aggregates.
746 Type *ArgTy;
747 Align ArgAlign;
748 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
749
750 emitKernelArg(DL, ArgTy, ArgAlign,
751 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
752 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
753}
754
756 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
757 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
758 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
759 StringRef AccQual, StringRef TypeQual) {
760 auto Arg = Args.getDocument()->getMapNode();
761
762 if (!Name.empty())
763 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
764 if (!TypeName.empty())
765 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
766 auto Size = DL.getTypeAllocSize(Ty);
767 Arg[".size"] = Arg.getDocument()->getNode(Size);
768 Offset = alignTo(Offset, Alignment);
769 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
770 Offset += Size;
771 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
772 if (PointeeAlign)
773 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
774
775 if (auto PtrTy = dyn_cast<PointerType>(Ty))
776 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
777 // Limiting address space to emit only for a certain ValueKind.
778 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
779 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
780 /*Copy=*/true);
781
782 if (auto AQ = getAccessQualifier(AccQual))
783 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
784
785 // TODO: Emit Arg[".actual_access"].
786
787 SmallVector<StringRef, 1> SplitTypeQuals;
788 TypeQual.split(SplitTypeQuals, " ", -1, false);
789 for (StringRef Key : SplitTypeQuals) {
790 if (Key == "const")
791 Arg[".is_const"] = Arg.getDocument()->getNode(true);
792 else if (Key == "restrict")
793 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
794 else if (Key == "volatile")
795 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
796 else if (Key == "pipe")
797 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
798 }
799
800 Args.push_back(Arg);
801}
802
804 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
805 auto &Func = MF.getFunction();
806 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
807
808 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
809 if (!HiddenArgNumBytes)
810 return;
811
812 const Module *M = Func.getParent();
813 auto &DL = M->getDataLayout();
814 auto Int64Ty = Type::getInt64Ty(Func.getContext());
815
816 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
817
818 if (HiddenArgNumBytes >= 8)
819 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
820 Args);
821 if (HiddenArgNumBytes >= 16)
822 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
823 Args);
824 if (HiddenArgNumBytes >= 24)
825 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
826 Args);
827
828 auto Int8PtrTy =
830
831 if (HiddenArgNumBytes >= 32) {
832 // We forbid the use of features requiring hostcall when compiling OpenCL
833 // before code object V5, which makes the mutual exclusion between the
834 // "printf buffer" and "hostcall buffer" here sound.
835 if (M->getNamedMetadata("llvm.printf.fmts"))
836 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
837 Args);
838 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
840 Args);
841 else
842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843 }
844
845 // Emit "default queue" and "completion action" arguments if enqueue kernel is
846 // used, otherwise emit dummy "none" arguments.
847 if (HiddenArgNumBytes >= 40) {
848 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
850 Args);
851 } else {
852 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
853 }
854 }
855
856 if (HiddenArgNumBytes >= 48) {
857 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
858 // FIXME: Hack for runtime bug if we fail to optimize this out
859 Func.hasFnAttribute("calls-enqueue-kernel")) {
860 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
861 Args);
862 } else {
863 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
864 }
865 }
866
867 // Emit the pointer argument for multi-grid object.
868 if (HiddenArgNumBytes >= 56) {
869 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
870 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
871 Args);
872 } else {
873 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
874 }
875 }
876}
877
879 const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
880 unsigned CodeObjectVersion) const {
881 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
883 const Function &F = MF.getFunction();
884
885 auto Kern = HSAMetadataDoc->getMapNode();
886
887 Align MaxKernArgAlign;
888 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
889 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
890 Kern[".group_segment_fixed_size"] =
891 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
892 Kern[".private_segment_fixed_size"] =
893 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
894 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
895 Kern[".uses_dynamic_stack"] =
896 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
897
898 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
899 Kern[".workgroup_processor_mode"] =
900 Kern.getDocument()->getNode(ProgramInfo.WgpMode);
901
902 // FIXME: The metadata treats the minimum as 16?
903 Kern[".kernarg_segment_align"] =
904 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
905 Kern[".wavefront_size"] =
906 Kern.getDocument()->getNode(STM.getWavefrontSize());
907 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
908 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
909
910 // Only add AGPR count to metadata for supported devices
911 if (STM.hasMAIInsts()) {
912 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
913 }
914
915 Kern[".max_flat_workgroup_size"] =
916 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
917 Kern[".sgpr_spill_count"] =
918 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
919 Kern[".vgpr_spill_count"] =
920 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
921
922 return Kern;
923}
924
926 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
927}
928
930 const IsaInfo::AMDGPUTargetID &TargetID) {
931 emitVersion();
933 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
934}
935
937 std::string HSAMetadataString;
938 raw_string_ostream StrOS(HSAMetadataString);
939 HSAMetadataDoc->toYAML(StrOS);
940
941 if (DumpHSAMetadata)
942 dump(StrOS.str());
944 verify(StrOS.str());
945}
946
948 const SIProgramInfo &ProgramInfo) {
949 auto &Func = MF.getFunction();
950 auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent());
951 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
952
953 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
954 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
955
956 auto Kernels =
957 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
958
959 {
960 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
961 Kern[".symbol"] = Kern.getDocument()->getNode(
962 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
963 emitKernelLanguage(Func, Kern);
964 emitKernelAttrs(Func, Kern);
965 emitKernelArgs(MF, Kern);
966 }
967
968 Kernels.push_back(Kern);
969}
970
971//===----------------------------------------------------------------------===//
972// HSAMetadataStreamerV4
973//===----------------------------------------------------------------------===//
974
976 auto Version = HSAMetadataDoc->getArrayNode();
977 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
978 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
979 getRootMetadata("amdhsa.version") = Version;
980}
981
983 const IsaInfo::AMDGPUTargetID &TargetID) {
984 getRootMetadata("amdhsa.target") =
985 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
986}
987
989 const IsaInfo::AMDGPUTargetID &TargetID) {
990 emitVersion();
991 emitTargetID(TargetID);
993 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
994}
995
996//===----------------------------------------------------------------------===//
997// HSAMetadataStreamerV5
998//===----------------------------------------------------------------------===//
999
1001 auto Version = HSAMetadataDoc->getArrayNode();
1002 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
1003 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
1004 getRootMetadata("amdhsa.version") = Version;
1005}
1006
1008 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
1009 auto &Func = MF.getFunction();
1010 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1011
1012 // No implicit kernel argument is used.
1013 if (ST.getImplicitArgNumBytes(Func) == 0)
1014 return;
1015
1016 const Module *M = Func.getParent();
1017 auto &DL = M->getDataLayout();
1019
1020 auto Int64Ty = Type::getInt64Ty(Func.getContext());
1021 auto Int32Ty = Type::getInt32Ty(Func.getContext());
1022 auto Int16Ty = Type::getInt16Ty(Func.getContext());
1023
1024 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1025 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1026 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1027 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1028
1029 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1030 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1031 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1032
1033 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1034 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1035 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1036
1037 // Reserved for hidden_tool_correlation_id.
1038 Offset += 8;
1039
1040 Offset += 8; // Reserved.
1041
1042 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1043 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1044 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1045
1046 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1047
1048 Offset += 6; // Reserved.
1049 auto Int8PtrTy =
1050 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1051
1052 if (M->getNamedMetadata("llvm.printf.fmts")) {
1053 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1054 Args);
1055 } else {
1056 Offset += 8; // Skipped.
1057 }
1058
1059 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1060 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1061 Args);
1062 } else {
1063 Offset += 8; // Skipped.
1064 }
1065
1066 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1067 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1068 Args);
1069 } else {
1070 Offset += 8; // Skipped.
1071 }
1072
1073 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1074 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1075 else
1076 Offset += 8; // Skipped.
1077
1078 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
1079 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1080 Args);
1081 } else {
1082 Offset += 8; // Skipped.
1083 }
1084
1085 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
1086 // FIXME: Hack for runtime bug
1087 Func.hasFnAttribute("calls-enqueue-kernel")) {
1088 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1089 Args);
1090 } else {
1091 Offset += 8; // Skipped.
1092 }
1093
1094 Offset += 72; // Reserved.
1095
1096 // hidden_private_base and hidden_shared_base are only when the subtarget has
1097 // ApertureRegs.
1098 if (!ST.hasApertureRegs()) {
1099 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1100 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1101 } else {
1102 Offset += 8; // Skipped.
1103 }
1104
1105 if (MFI.hasQueuePtr())
1106 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1107}
1108
1110 msgpack::MapDocNode Kern) {
1112
1113 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
1114 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
1115}
1116
1117
1118} // end namespace HSAMD
1119} // end namespace AMDGPU
1120} // end namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
AMDGPU HSA Metadata Streamer.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Given that RA is a live value
std::string Name
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition: MD5.cpp:55
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
ppc ctr loops verify
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Defines struct to track resource usage and hardware flags for kernels and entry functions.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern)
std::string getTypeName(Type *Ty, bool Signed) const
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
std::unique_ptr< msgpack::Document > HSAMetadataDoc
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
std::optional< StringRef > getAccessQualifier(StringRef AccQual) const
void verify(StringRef HSAMetadataString) const
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
std::optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo, unsigned CodeObjectVersion) const
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID)
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool hasMAIInsts() const
Definition: GCNSubtarget.h:753
bool supportsWGP() const
Definition: GCNSubtarget.h:312
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:564
Metadata node.
Definition: Metadata.h:943
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:687
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:422
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
static IntegerType * getInt16Ty(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A DocNode that is a map.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:660
unsigned LanguageVersion(SourceLanguage L)
Definition: Dwarf.cpp:346
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:378
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:381
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:380
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:376
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:377
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:382
constexpr char PointeeAlign[]
Key for Kernel::Arg::Metadata::mPointeeAlign.
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
constexpr char AccQual[]
Key for Kernel::Arg::Metadata::mAccQual.
std::error_code fromString(StringRef String, Metadata &HSAMetadata)
Converts String to HSAMetadata.
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
AddressSpaceQualifier
Address space qualifiers.
ValueKind
Value kinds.
std::error_code toString(Metadata HSAMetadata, std::string &String)
Converts HSAMetadata to String.
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
constexpr uint32_t VersionMajorV2
HSA metadata major version for code object V2.
constexpr uint32_t VersionMinorV2
HSA metadata minor version for code object V2.
AccessQualifier
Access qualifiers.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getCodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
AddressSpace
Definition: NVPTXBaseInfo.h:21
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
Definition: TypeName.h:27
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
In-memory representation of kernel metadata.
std::vector< Kernel::Metadata > mKernels
Kernels metadata. Required.
std::vector< uint32_t > mVersion
HSA metadata version. Required.
std::vector< std::string > mPrintf
Printf metadata. Optional.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25