LLVM  12.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25  const DataLayout &DL) {
26  Type *Ty = Arg.getType();
27  MaybeAlign ArgAlign;
28  if (Arg.hasByRefAttr()) {
29  Ty = Arg.getParamByRefType();
30  ArgAlign = Arg.getParamAlign();
31  }
32 
33  if (!ArgAlign)
34  ArgAlign = DL.getABITypeAlign(Ty);
35 
36  return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
42  "amdgpu-dump-hsa-metadata",
43  cl::desc("Dump AMDGPU HSA Metadata"));
45  "amdgpu-verify-hsa-metadata",
46  cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59  errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61  HSAMD::Metadata FromHSAMetadataString;
62  if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
63  errs() << "FAIL\n";
64  return;
65  }
66 
67  std::string ToHSAMetadataString;
68  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69  errs() << "FAIL\n";
70  return;
71  }
72 
73  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74  << '\n';
75  if (HSAMetadataString != ToHSAMetadataString) {
76  errs() << "Original input: " << HSAMetadataString << '\n'
77  << "Produced output: " << ToHSAMetadataString << '\n';
78  }
79 }
80 
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83  if (AccQual.empty())
85 
87  .Case("read_only", AccessQualifier::ReadOnly)
88  .Case("write_only", AccessQualifier::WriteOnly)
89  .Case("read_write", AccessQualifier::ReadWrite)
91 }
92 
94 MetadataStreamerV2::getAddressSpaceQualifier(
95  unsigned AddressSpace) const {
96  switch (AddressSpace) {
109  default:
111  }
112 }
113 
114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115  StringRef BaseTypeName) const {
116  if (TypeQual.find("pipe") != StringRef::npos)
117  return ValueKind::Pipe;
118 
119  return StringSwitch<ValueKind>(BaseTypeName)
120  .Case("image1d_t", ValueKind::Image)
121  .Case("image1d_array_t", ValueKind::Image)
122  .Case("image1d_buffer_t", ValueKind::Image)
123  .Case("image2d_t", ValueKind::Image)
124  .Case("image2d_array_t", ValueKind::Image)
125  .Case("image2d_array_depth_t", ValueKind::Image)
126  .Case("image2d_array_msaa_t", ValueKind::Image)
127  .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128  .Case("image2d_depth_t", ValueKind::Image)
129  .Case("image2d_msaa_t", ValueKind::Image)
130  .Case("image2d_msaa_depth_t", ValueKind::Image)
131  .Case("image3d_t", ValueKind::Image)
132  .Case("sampler_t", ValueKind::Sampler)
133  .Case("queue_t", ValueKind::Queue)
134  .Default(isa<PointerType>(Ty) ?
135  (Ty->getPointerAddressSpace() ==
140 }
141 
142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143  switch (Ty->getTypeID()) {
144  case Type::IntegerTyID: {
145  if (!Signed)
146  return (Twine('u') + getTypeName(Ty, true)).str();
147 
148  auto BitWidth = Ty->getIntegerBitWidth();
149  switch (BitWidth) {
150  case 8:
151  return "char";
152  case 16:
153  return "short";
154  case 32:
155  return "int";
156  case 64:
157  return "long";
158  default:
159  return (Twine('i') + Twine(BitWidth)).str();
160  }
161  }
162  case Type::HalfTyID:
163  return "half";
164  case Type::FloatTyID:
165  return "float";
166  case Type::DoubleTyID:
167  return "double";
168  case Type::FixedVectorTyID: {
169  auto VecTy = cast<FixedVectorType>(Ty);
170  auto ElTy = VecTy->getElementType();
171  auto NumElements = VecTy->getNumElements();
172  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173  }
174  default:
175  return "unknown";
176  }
177 }
178 
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181  std::vector<uint32_t> Dims;
182  if (Node->getNumOperands() != 3)
183  return Dims;
184 
185  for (auto &Op : Node->operands())
186  Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187  return Dims;
188 }
189 
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192  const SIProgramInfo &ProgramInfo) const {
193  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
195  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196  const Function &F = MF.getFunction();
197 
198  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199  F.getCallingConv() == CallingConv::SPIR_KERNEL);
200 
201  Align MaxKernArgAlign;
202  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203  MaxKernArgAlign);
204  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
205  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
206  HSACodeProps.mKernargSegmentAlign =
207  std::max(MaxKernArgAlign, Align(4)).value();
208  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
209  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
210  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
211  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
212  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
213  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
214  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
215  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
216 
217  return HSACodeProps;
218 }
219 
220 Kernel::DebugProps::Metadata
221 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
222  const SIProgramInfo &ProgramInfo) const {
223  return HSAMD::Kernel::DebugProps::Metadata();
224 }
225 
226 void MetadataStreamerV2::emitVersion() {
227  auto &Version = HSAMetadata.mVersion;
228 
229  Version.push_back(VersionMajor);
230  Version.push_back(VersionMinor);
231 }
232 
233 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
234  auto &Printf = HSAMetadata.mPrintf;
235 
236  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
237  if (!Node)
238  return;
239 
240  for (auto Op : Node->operands())
241  if (Op->getNumOperands())
242  Printf.push_back(
243  std::string(cast<MDString>(Op->getOperand(0))->getString()));
244 }
245 
246 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
247  auto &Kernel = HSAMetadata.mKernels.back();
248 
249  // TODO: What about other languages?
250  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
251  if (!Node || !Node->getNumOperands())
252  return;
253  auto Op0 = Node->getOperand(0);
254  if (Op0->getNumOperands() <= 1)
255  return;
256 
257  Kernel.mLanguage = "OpenCL C";
258  Kernel.mLanguageVersion.push_back(
259  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
260  Kernel.mLanguageVersion.push_back(
261  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
262 }
263 
264 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
265  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
266 
267  if (auto Node = Func.getMetadata("reqd_work_group_size"))
268  Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
269  if (auto Node = Func.getMetadata("work_group_size_hint"))
270  Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
271  if (auto Node = Func.getMetadata("vec_type_hint")) {
272  Attrs.mVecTypeHint = getTypeName(
273  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
274  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
275  }
276  if (Func.hasFnAttribute("runtime-handle")) {
277  Attrs.mRuntimeHandle =
278  Func.getFnAttribute("runtime-handle").getValueAsString().str();
279  }
280 }
281 
282 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
283  for (auto &Arg : Func.args())
284  emitKernelArg(Arg);
285 
286  emitHiddenKernelArgs(Func);
287 }
288 
289 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
290  auto Func = Arg.getParent();
291  auto ArgNo = Arg.getArgNo();
292  const MDNode *Node;
293 
294  StringRef Name;
295  Node = Func->getMetadata("kernel_arg_name");
296  if (Node && ArgNo < Node->getNumOperands())
297  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
298  else if (Arg.hasName())
299  Name = Arg.getName();
300 
302  Node = Func->getMetadata("kernel_arg_type");
303  if (Node && ArgNo < Node->getNumOperands())
304  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
305 
306  StringRef BaseTypeName;
307  Node = Func->getMetadata("kernel_arg_base_type");
308  if (Node && ArgNo < Node->getNumOperands())
309  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
310 
312  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
313  Arg.hasNoAliasAttr()) {
314  AccQual = "read_only";
315  } else {
316  Node = Func->getMetadata("kernel_arg_access_qual");
317  if (Node && ArgNo < Node->getNumOperands())
318  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
319  }
320 
321  StringRef TypeQual;
322  Node = Func->getMetadata("kernel_arg_type_qual");
323  if (Node && ArgNo < Node->getNumOperands())
324  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
325 
326  const DataLayout &DL = Func->getParent()->getDataLayout();
327 
329  if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
330  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
331  // FIXME: Should report this for all address spaces
332  PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
333  PtrTy->getElementType());
334  }
335  }
336 
337  Type *ArgTy;
338  Align ArgAlign;
339  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
340 
341  emitKernelArg(DL, ArgTy, ArgAlign,
342  getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
343  TypeName, BaseTypeName, AccQual, TypeQual);
344 }
345 
346 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
347  Align Alignment, ValueKind ValueKind,
350  StringRef BaseTypeName,
351  StringRef AccQual, StringRef TypeQual) {
352  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
353  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
354 
355  Arg.mName = std::string(Name);
356  Arg.mTypeName = std::string(TypeName);
357  Arg.mSize = DL.getTypeAllocSize(Ty);
358  Arg.mAlign = Alignment.value();
359  Arg.mValueKind = ValueKind;
360  Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
361 
362  if (auto PtrTy = dyn_cast<PointerType>(Ty))
363  Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
364 
365  Arg.mAccQual = getAccessQualifier(AccQual);
366 
367  // TODO: Emit Arg.mActualAccQual.
368 
369  SmallVector<StringRef, 1> SplitTypeQuals;
370  TypeQual.split(SplitTypeQuals, " ", -1, false);
371  for (StringRef Key : SplitTypeQuals) {
372  auto P = StringSwitch<bool*>(Key)
373  .Case("const", &Arg.mIsConst)
374  .Case("restrict", &Arg.mIsRestrict)
375  .Case("volatile", &Arg.mIsVolatile)
376  .Case("pipe", &Arg.mIsPipe)
377  .Default(nullptr);
378  if (P)
379  *P = true;
380  }
381 }
382 
383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
384  int HiddenArgNumBytes =
385  getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
386 
387  if (!HiddenArgNumBytes)
388  return;
389 
390  auto &DL = Func.getParent()->getDataLayout();
391  auto Int64Ty = Type::getInt64Ty(Func.getContext());
392 
393  if (HiddenArgNumBytes >= 8)
394  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395  if (HiddenArgNumBytes >= 16)
396  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397  if (HiddenArgNumBytes >= 24)
398  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399 
400  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
402 
403  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
404  // "none" argument.
405  if (HiddenArgNumBytes >= 32) {
406  if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
407  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
408  else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
409  // The printf runtime binding pass should have ensured that hostcall and
410  // printf are not used in the same module.
411  assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
412  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
413  } else
414  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
415  }
416 
417  // Emit "default queue" and "completion action" arguments if enqueue kernel is
418  // used, otherwise emit dummy "none" arguments.
419  if (HiddenArgNumBytes >= 48) {
420  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
421  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
422  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
423  } else {
424  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
425  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
426  }
427  }
428 
429  // Emit the pointer argument for multi-grid object.
430  if (HiddenArgNumBytes >= 56)
431  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
432 }
433 
435  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
436 }
437 
439  emitVersion();
440  emitPrintf(Mod);
441 }
442 
444  std::string HSAMetadataString;
445  if (toString(HSAMetadata, HSAMetadataString))
446  return;
447 
448  if (DumpHSAMetadata)
449  dump(HSAMetadataString);
450  if (VerifyHSAMetadata)
451  verify(HSAMetadataString);
452 }
453 
455  const SIProgramInfo &ProgramInfo) {
456  auto &Func = MF.getFunction();
457  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
458  return;
459 
460  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
461  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
462 
463  HSAMetadata.mKernels.push_back(Kernel::Metadata());
464  auto &Kernel = HSAMetadata.mKernels.back();
465 
466  Kernel.mName = std::string(Func.getName());
467  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
468  emitKernelLanguage(Func);
469  emitKernelAttrs(Func);
470  emitKernelArgs(Func);
471  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
472  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
473 }
474 
475 //===----------------------------------------------------------------------===//
476 // HSAMetadataStreamerV3
477 //===----------------------------------------------------------------------===//
478 
479 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
480  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
481 }
482 
483 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
484  errs() << "AMDGPU HSA Metadata Parser Test: ";
485 
486  msgpack::Document FromHSAMetadataString;
487 
488  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
489  errs() << "FAIL\n";
490  return;
491  }
492 
493  std::string ToHSAMetadataString;
494  raw_string_ostream StrOS(ToHSAMetadataString);
495  FromHSAMetadataString.toYAML(StrOS);
496 
497  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
498  if (HSAMetadataString != ToHSAMetadataString) {
499  errs() << "Original input: " << HSAMetadataString << '\n'
500  << "Produced output: " << StrOS.str() << '\n';
501  }
502 }
503 
505 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
507  .Case("read_only", StringRef("read_only"))
508  .Case("write_only", StringRef("write_only"))
509  .Case("read_write", StringRef("read_write"))
510  .Default(None);
511 }
512 
514 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
515  switch (AddressSpace) {
517  return StringRef("private");
519  return StringRef("global");
521  return StringRef("constant");
523  return StringRef("local");
525  return StringRef("generic");
527  return StringRef("region");
528  default:
529  return None;
530  }
531 }
532 
533 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
534  StringRef BaseTypeName) const {
535  if (TypeQual.find("pipe") != StringRef::npos)
536  return "pipe";
537 
538  return StringSwitch<StringRef>(BaseTypeName)
539  .Case("image1d_t", "image")
540  .Case("image1d_array_t", "image")
541  .Case("image1d_buffer_t", "image")
542  .Case("image2d_t", "image")
543  .Case("image2d_array_t", "image")
544  .Case("image2d_array_depth_t", "image")
545  .Case("image2d_array_msaa_t", "image")
546  .Case("image2d_array_msaa_depth_t", "image")
547  .Case("image2d_depth_t", "image")
548  .Case("image2d_msaa_t", "image")
549  .Case("image2d_msaa_depth_t", "image")
550  .Case("image3d_t", "image")
551  .Case("sampler_t", "sampler")
552  .Case("queue_t", "queue")
553  .Default(isa<PointerType>(Ty)
555  ? "dynamic_shared_pointer"
556  : "global_buffer")
557  : "by_value");
558 }
559 
560 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
561  switch (Ty->getTypeID()) {
562  case Type::IntegerTyID: {
563  if (!Signed)
564  return (Twine('u') + getTypeName(Ty, true)).str();
565 
566  auto BitWidth = Ty->getIntegerBitWidth();
567  switch (BitWidth) {
568  case 8:
569  return "char";
570  case 16:
571  return "short";
572  case 32:
573  return "int";
574  case 64:
575  return "long";
576  default:
577  return (Twine('i') + Twine(BitWidth)).str();
578  }
579  }
580  case Type::HalfTyID:
581  return "half";
582  case Type::FloatTyID:
583  return "float";
584  case Type::DoubleTyID:
585  return "double";
586  case Type::FixedVectorTyID: {
587  auto VecTy = cast<FixedVectorType>(Ty);
588  auto ElTy = VecTy->getElementType();
589  auto NumElements = VecTy->getNumElements();
590  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
591  }
592  default:
593  return "unknown";
594  }
595 }
596 
598 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
599  auto Dims = HSAMetadataDoc->getArrayNode();
600  if (Node->getNumOperands() != 3)
601  return Dims;
602 
603  for (auto &Op : Node->operands())
604  Dims.push_back(Dims.getDocument()->getNode(
605  uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
606  return Dims;
607 }
608 
609 void MetadataStreamerV3::emitVersion() {
610  auto Version = HSAMetadataDoc->getArrayNode();
611  Version.push_back(Version.getDocument()->getNode(VersionMajor));
612  Version.push_back(Version.getDocument()->getNode(VersionMinor));
613  getRootMetadata("amdhsa.version") = Version;
614 }
615 
616 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
617  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
618  if (!Node)
619  return;
620 
621  auto Printf = HSAMetadataDoc->getArrayNode();
622  for (auto Op : Node->operands())
623  if (Op->getNumOperands())
624  Printf.push_back(Printf.getDocument()->getNode(
625  cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
626  getRootMetadata("amdhsa.printf") = Printf;
627 }
628 
629 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
630  msgpack::MapDocNode Kern) {
631  // TODO: What about other languages?
632  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
633  if (!Node || !Node->getNumOperands())
634  return;
635  auto Op0 = Node->getOperand(0);
636  if (Op0->getNumOperands() <= 1)
637  return;
638 
639  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
640  auto LanguageVersion = Kern.getDocument()->getArrayNode();
641  LanguageVersion.push_back(Kern.getDocument()->getNode(
642  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
643  LanguageVersion.push_back(Kern.getDocument()->getNode(
644  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
645  Kern[".language_version"] = LanguageVersion;
646 }
647 
648 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
649  msgpack::MapDocNode Kern) {
650 
651  if (auto Node = Func.getMetadata("reqd_work_group_size"))
652  Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
653  if (auto Node = Func.getMetadata("work_group_size_hint"))
654  Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
655  if (auto Node = Func.getMetadata("vec_type_hint")) {
656  Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
657  getTypeName(
658  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
659  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
660  /*Copy=*/true);
661  }
662  if (Func.hasFnAttribute("runtime-handle")) {
663  Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
664  Func.getFnAttribute("runtime-handle").getValueAsString().str(),
665  /*Copy=*/true);
666  }
667 }
668 
669 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
670  msgpack::MapDocNode Kern) {
671  unsigned Offset = 0;
672  auto Args = HSAMetadataDoc->getArrayNode();
673  for (auto &Arg : Func.args())
674  emitKernelArg(Arg, Offset, Args);
675 
676  emitHiddenKernelArgs(Func, Offset, Args);
677 
678  Kern[".args"] = Args;
679 }
680 
681 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
683  auto Func = Arg.getParent();
684  auto ArgNo = Arg.getArgNo();
685  const MDNode *Node;
686 
687  StringRef Name;
688  Node = Func->getMetadata("kernel_arg_name");
689  if (Node && ArgNo < Node->getNumOperands())
690  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
691  else if (Arg.hasName())
692  Name = Arg.getName();
693 
695  Node = Func->getMetadata("kernel_arg_type");
696  if (Node && ArgNo < Node->getNumOperands())
697  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
698 
699  StringRef BaseTypeName;
700  Node = Func->getMetadata("kernel_arg_base_type");
701  if (Node && ArgNo < Node->getNumOperands())
702  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
703 
705  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
706  Arg.hasNoAliasAttr()) {
707  AccQual = "read_only";
708  } else {
709  Node = Func->getMetadata("kernel_arg_access_qual");
710  if (Node && ArgNo < Node->getNumOperands())
711  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
712  }
713 
714  StringRef TypeQual;
715  Node = Func->getMetadata("kernel_arg_type_qual");
716  if (Node && ArgNo < Node->getNumOperands())
717  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
718 
719  const DataLayout &DL = Func->getParent()->getDataLayout();
720 
722  Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
723 
724  // FIXME: Need to distinguish in memory alignment from pointer alignment.
725  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
726  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
727  PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
728  PtrTy->getElementType());
729  }
730  }
731 
732  // There's no distinction between byval aggregates and raw aggregates.
733  Type *ArgTy;
734  Align ArgAlign;
735  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
736 
737  emitKernelArg(DL, ArgTy, ArgAlign,
738  getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
739  PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
740 }
741 
742 void MetadataStreamerV3::emitKernelArg(
743  const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
745  StringRef Name, StringRef TypeName, StringRef BaseTypeName,
746  StringRef AccQual, StringRef TypeQual) {
747  auto Arg = Args.getDocument()->getMapNode();
748 
749  if (!Name.empty())
750  Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
751  if (!TypeName.empty())
752  Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
753  auto Size = DL.getTypeAllocSize(Ty);
754  Arg[".size"] = Arg.getDocument()->getNode(Size);
755  Offset = alignTo(Offset, Alignment);
756  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
757  Offset += Size;
758  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
759  if (PointeeAlign)
760  Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
761 
762  if (auto PtrTy = dyn_cast<PointerType>(Ty))
763  if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
764  Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
765 
766  if (auto AQ = getAccessQualifier(AccQual))
767  Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
768 
769  // TODO: Emit Arg[".actual_access"].
770 
771  SmallVector<StringRef, 1> SplitTypeQuals;
772  TypeQual.split(SplitTypeQuals, " ", -1, false);
773  for (StringRef Key : SplitTypeQuals) {
774  if (Key == "const")
775  Arg[".is_const"] = Arg.getDocument()->getNode(true);
776  else if (Key == "restrict")
777  Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
778  else if (Key == "volatile")
779  Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
780  else if (Key == "pipe")
781  Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
782  }
783 
784  Args.push_back(Arg);
785 }
786 
787 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
788  unsigned &Offset,
790  int HiddenArgNumBytes =
791  getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
792 
793  if (!HiddenArgNumBytes)
794  return;
795 
796  auto &DL = Func.getParent()->getDataLayout();
797  auto Int64Ty = Type::getInt64Ty(Func.getContext());
798 
799  if (HiddenArgNumBytes >= 8)
800  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
801  Args);
802  if (HiddenArgNumBytes >= 16)
803  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
804  Args);
805  if (HiddenArgNumBytes >= 24)
806  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
807  Args);
808 
809  auto Int8PtrTy =
811 
812  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
813  // "none" argument.
814  if (HiddenArgNumBytes >= 32) {
815  if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
816  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
817  Args);
818  else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
819  // The printf runtime binding pass should have ensured that hostcall and
820  // printf are not used in the same module.
821  assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
822  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
823  Args);
824  } else
825  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
826  }
827 
828  // Emit "default queue" and "completion action" arguments if enqueue kernel is
829  // used, otherwise emit dummy "none" arguments.
830  if (HiddenArgNumBytes >= 48) {
831  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
832  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
833  Args);
834  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
835  Args);
836  } else {
837  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
838  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
839  }
840  }
841 
842  // Emit the pointer argument for multi-grid object.
843  if (HiddenArgNumBytes >= 56)
844  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
845  Args);
846 }
847 
849 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
850  const SIProgramInfo &ProgramInfo) const {
851  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
853  const Function &F = MF.getFunction();
854 
855  auto Kern = HSAMetadataDoc->getMapNode();
856 
857  Align MaxKernArgAlign;
858  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
859  STM.getKernArgSegmentSize(F, MaxKernArgAlign));
860  Kern[".group_segment_fixed_size"] =
861  Kern.getDocument()->getNode(ProgramInfo.LDSSize);
862  Kern[".private_segment_fixed_size"] =
863  Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
864  Kern[".kernarg_segment_align"] =
865  Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
866  Kern[".wavefront_size"] =
867  Kern.getDocument()->getNode(STM.getWavefrontSize());
868  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
869  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
870  Kern[".max_flat_workgroup_size"] =
871  Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
872  Kern[".sgpr_spill_count"] =
873  Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
874  Kern[".vgpr_spill_count"] =
875  Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
876 
877  return Kern;
878 }
879 
881  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
882 }
883 
885  emitVersion();
886  emitPrintf(Mod);
887  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
888 }
889 
891  std::string HSAMetadataString;
892  raw_string_ostream StrOS(HSAMetadataString);
893  HSAMetadataDoc->toYAML(StrOS);
894 
895  if (DumpHSAMetadata)
896  dump(StrOS.str());
897  if (VerifyHSAMetadata)
898  verify(StrOS.str());
899 }
900 
902  const SIProgramInfo &ProgramInfo) {
903  auto &Func = MF.getFunction();
904  auto Kern = getHSAKernelProps(MF, ProgramInfo);
905 
906  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
907  Func.getCallingConv() == CallingConv::SPIR_KERNEL);
908 
909  auto Kernels =
910  getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
911 
912  {
913  Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
914  Kern[".symbol"] = Kern.getDocument()->getNode(
915  (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
916  emitKernelLanguage(Func, Kern);
917  emitKernelAttrs(Func, Kern);
918  emitKernelArgs(Func, Kern);
919  }
920 
921  Kernels.push_back(Kern);
922 }
923 
924 } // end namespace HSAMD
925 } // end namespace AMDGPU
926 } // end namespace llvm
const NoneType None
Definition: None.h:23
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:111
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:248
AMDGPU specific subclass of TargetSubtarget.
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
32-bit floating point type
Definition: Type.h:59
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:147
Document * getDocument() const
constexpr char PointeeAlign[]
Key for Kernel::Arg::Metadata::mPointeeAlign.
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:368
Address space for local memory.
Definition: AMDGPU.h:369
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
Metadata node.
Definition: Metadata.h:870
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:711
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
16-bit floating point type
Definition: Type.h:57
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:198
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
Function & getFunction()
Return the LLVM function that this machine code represents.
A DocNode that is an array.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Defines struct to track resource usage and hardware flags for kernels and entry functions.
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
Fixed width SIMD vector type.
Definition: Type.h:77
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
static constexpr size_t npos
Definition: StringRef.h:59
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
std::error_code fromString(std::string String, Metadata &HSAMetadata)
Converts String to HSAMetadata.
Key
PAL metadata keys.
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
DocNode getNode()
Create a nil node associated with this Document.
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
AMDGPU HSA Metadata Streamer.
Address space for private memory.
Definition: AMDGPU.h:370
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
Arbitrary bit width integers.
Definition: Type.h:72
#define P(N)
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
In-memory representation of kernel metadata.
ValueKind
Value kinds.
std::vector< uint32_t > mVersion
HSA metadata version. Required.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:365
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:318
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
Address space for region memory. (GDS)
Definition: AMDGPU.h:366
uint64_t Align
static wasm::ValType getType(const TargetRegisterClass *RC)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:249
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:625
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
AccessQualifier
Access qualifiers.
std::vector< std::string > mPrintf
Printf metadata. Optional.
unsigned getWavefrontSize() const
uint64_t Offset
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:730
AddressSpace
Definition: NVPTXBaseInfo.h:21
constexpr char CodeProps[]
Key for Kernel::Metadata::mCodeProps.
The access may modify the value stored in memory.
constexpr char LanguageVersion[]
Key for Kernel::Metadata::mLanguageVersion.
Address space for flat memory.
Definition: AMDGPU.h:364
constexpr uint32_t VersionMinor
HSA metadata minor version.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
A DocNode that is a map.
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
std::error_code toString(Metadata HSAMetadata, std::string &String)
Converts HSAMetadata to String.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
constexpr char DebugProps[]
Key for Kernel::Metadata::mDebugProps.
AddressSpaceQualifier
Address space qualifiers.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:211
constexpr char AccQual[]
Key for Kernel::Arg::Metadata::mAccQual.
64-bit floating point type
Definition: Type.h:60
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)=0
Emit HSA Metadata.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:607
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
std::vector< Kernel::Metadata > mKernels
Kernels metadata. Required.
const uint64_t Version
Definition: InstrProf.h:989
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr uint32_t VersionMajor
HSA metadata major version.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL