LLVM  14.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25  const DataLayout &DL) {
26  Type *Ty = Arg.getType();
27  MaybeAlign ArgAlign;
28  if (Arg.hasByRefAttr()) {
29  Ty = Arg.getParamByRefType();
30  ArgAlign = Arg.getParamAlign();
31  }
32 
33  if (!ArgAlign)
34  ArgAlign = DL.getABITypeAlign(Ty);
35 
36  return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
42  "amdgpu-dump-hsa-metadata",
43  cl::desc("Dump AMDGPU HSA Metadata"));
45  "amdgpu-verify-hsa-metadata",
46  cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59  errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61  HSAMD::Metadata FromHSAMetadataString;
62  if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63  errs() << "FAIL\n";
64  return;
65  }
66 
67  std::string ToHSAMetadataString;
68  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69  errs() << "FAIL\n";
70  return;
71  }
72 
73  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74  << '\n';
75  if (HSAMetadataString != ToHSAMetadataString) {
76  errs() << "Original input: " << HSAMetadataString << '\n'
77  << "Produced output: " << ToHSAMetadataString << '\n';
78  }
79 }
80 
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83  if (AccQual.empty())
85 
87  .Case("read_only", AccessQualifier::ReadOnly)
88  .Case("write_only", AccessQualifier::WriteOnly)
89  .Case("read_write", AccessQualifier::ReadWrite)
91 }
92 
94 MetadataStreamerV2::getAddressSpaceQualifier(
95  unsigned AddressSpace) const {
96  switch (AddressSpace) {
109  default:
111  }
112 }
113 
114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115  StringRef BaseTypeName) const {
116  if (TypeQual.contains("pipe"))
117  return ValueKind::Pipe;
118 
119  return StringSwitch<ValueKind>(BaseTypeName)
120  .Case("image1d_t", ValueKind::Image)
121  .Case("image1d_array_t", ValueKind::Image)
122  .Case("image1d_buffer_t", ValueKind::Image)
123  .Case("image2d_t", ValueKind::Image)
124  .Case("image2d_array_t", ValueKind::Image)
125  .Case("image2d_array_depth_t", ValueKind::Image)
126  .Case("image2d_array_msaa_t", ValueKind::Image)
127  .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128  .Case("image2d_depth_t", ValueKind::Image)
129  .Case("image2d_msaa_t", ValueKind::Image)
130  .Case("image2d_msaa_depth_t", ValueKind::Image)
131  .Case("image3d_t", ValueKind::Image)
132  .Case("sampler_t", ValueKind::Sampler)
133  .Case("queue_t", ValueKind::Queue)
134  .Default(isa<PointerType>(Ty) ?
135  (Ty->getPointerAddressSpace() ==
140 }
141 
142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143  switch (Ty->getTypeID()) {
144  case Type::IntegerTyID: {
145  if (!Signed)
146  return (Twine('u') + getTypeName(Ty, true)).str();
147 
148  auto BitWidth = Ty->getIntegerBitWidth();
149  switch (BitWidth) {
150  case 8:
151  return "char";
152  case 16:
153  return "short";
154  case 32:
155  return "int";
156  case 64:
157  return "long";
158  default:
159  return (Twine('i') + Twine(BitWidth)).str();
160  }
161  }
162  case Type::HalfTyID:
163  return "half";
164  case Type::FloatTyID:
165  return "float";
166  case Type::DoubleTyID:
167  return "double";
168  case Type::FixedVectorTyID: {
169  auto VecTy = cast<FixedVectorType>(Ty);
170  auto ElTy = VecTy->getElementType();
171  auto NumElements = VecTy->getNumElements();
172  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173  }
174  default:
175  return "unknown";
176  }
177 }
178 
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181  std::vector<uint32_t> Dims;
182  if (Node->getNumOperands() != 3)
183  return Dims;
184 
185  for (auto &Op : Node->operands())
186  Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187  return Dims;
188 }
189 
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192  const SIProgramInfo &ProgramInfo) const {
193  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
195  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196  const Function &F = MF.getFunction();
197 
198  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199  F.getCallingConv() == CallingConv::SPIR_KERNEL);
200 
201  Align MaxKernArgAlign;
202  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203  MaxKernArgAlign);
204  HSACodeProps.mKernargSegmentAlign =
205  std::max(MaxKernArgAlign, Align(4)).value();
206 
207  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
208  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
209  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
210  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
211  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
212  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
213  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
214  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
215  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
216  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
217 
218  return HSACodeProps;
219 }
220 
221 Kernel::DebugProps::Metadata
222 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
223  const SIProgramInfo &ProgramInfo) const {
224  return HSAMD::Kernel::DebugProps::Metadata();
225 }
226 
227 void MetadataStreamerV2::emitVersion() {
228  auto &Version = HSAMetadata.mVersion;
229 
230  Version.push_back(VersionMajorV2);
231  Version.push_back(VersionMinorV2);
232 }
233 
234 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
235  auto &Printf = HSAMetadata.mPrintf;
236 
237  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
238  if (!Node)
239  return;
240 
241  for (auto Op : Node->operands())
242  if (Op->getNumOperands())
243  Printf.push_back(
244  std::string(cast<MDString>(Op->getOperand(0))->getString()));
245 }
246 
247 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
248  auto &Kernel = HSAMetadata.mKernels.back();
249 
250  // TODO: What about other languages?
251  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
252  if (!Node || !Node->getNumOperands())
253  return;
254  auto Op0 = Node->getOperand(0);
255  if (Op0->getNumOperands() <= 1)
256  return;
257 
258  Kernel.mLanguage = "OpenCL C";
259  Kernel.mLanguageVersion.push_back(
260  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
261  Kernel.mLanguageVersion.push_back(
262  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
263 }
264 
265 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
266  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
267 
268  if (auto Node = Func.getMetadata("reqd_work_group_size"))
269  Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
270  if (auto Node = Func.getMetadata("work_group_size_hint"))
271  Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
272  if (auto Node = Func.getMetadata("vec_type_hint")) {
273  Attrs.mVecTypeHint = getTypeName(
274  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
275  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
276  }
277  if (Func.hasFnAttribute("runtime-handle")) {
278  Attrs.mRuntimeHandle =
279  Func.getFnAttribute("runtime-handle").getValueAsString().str();
280  }
281 }
282 
283 void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284  const GCNSubtarget &ST) {
285  for (auto &Arg : Func.args())
286  emitKernelArg(Arg);
287 
288  emitHiddenKernelArgs(Func, ST);
289 }
290 
291 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
292  auto Func = Arg.getParent();
293  auto ArgNo = Arg.getArgNo();
294  const MDNode *Node;
295 
296  StringRef Name;
297  Node = Func->getMetadata("kernel_arg_name");
298  if (Node && ArgNo < Node->getNumOperands())
299  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
300  else if (Arg.hasName())
301  Name = Arg.getName();
302 
304  Node = Func->getMetadata("kernel_arg_type");
305  if (Node && ArgNo < Node->getNumOperands())
306  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
307 
308  StringRef BaseTypeName;
309  Node = Func->getMetadata("kernel_arg_base_type");
310  if (Node && ArgNo < Node->getNumOperands())
311  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 
314  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
315  Arg.hasNoAliasAttr()) {
316  AccQual = "read_only";
317  } else {
318  Node = Func->getMetadata("kernel_arg_access_qual");
319  if (Node && ArgNo < Node->getNumOperands())
320  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321  }
322 
323  StringRef TypeQual;
324  Node = Func->getMetadata("kernel_arg_type_qual");
325  if (Node && ArgNo < Node->getNumOperands())
326  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
327 
328  const DataLayout &DL = Func->getParent()->getDataLayout();
329 
331  if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
332  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
333  // FIXME: Should report this for all address spaces
334  PointeeAlign = Arg.getParamAlign().valueOrOne();
335  }
336  }
337 
338  Type *ArgTy;
339  Align ArgAlign;
340  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
341 
342  emitKernelArg(DL, ArgTy, ArgAlign,
343  getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
344  TypeName, BaseTypeName, AccQual, TypeQual);
345 }
346 
347 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
348  Align Alignment, ValueKind ValueKind,
351  StringRef BaseTypeName,
352  StringRef AccQual, StringRef TypeQual) {
353  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
354  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
355 
356  Arg.mName = std::string(Name);
357  Arg.mTypeName = std::string(TypeName);
358  Arg.mSize = DL.getTypeAllocSize(Ty);
359  Arg.mAlign = Alignment.value();
360  Arg.mValueKind = ValueKind;
361  Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
362 
363  if (auto PtrTy = dyn_cast<PointerType>(Ty))
364  Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
365 
366  Arg.mAccQual = getAccessQualifier(AccQual);
367 
368  // TODO: Emit Arg.mActualAccQual.
369 
370  SmallVector<StringRef, 1> SplitTypeQuals;
371  TypeQual.split(SplitTypeQuals, " ", -1, false);
372  for (StringRef Key : SplitTypeQuals) {
373  auto P = StringSwitch<bool*>(Key)
374  .Case("const", &Arg.mIsConst)
375  .Case("restrict", &Arg.mIsRestrict)
376  .Case("volatile", &Arg.mIsVolatile)
377  .Case("pipe", &Arg.mIsPipe)
378  .Default(nullptr);
379  if (P)
380  *P = true;
381  }
382 }
383 
384 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
385  const GCNSubtarget &ST) {
386  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
387  if (!HiddenArgNumBytes)
388  return;
389 
390  auto &DL = Func.getParent()->getDataLayout();
391  auto Int64Ty = Type::getInt64Ty(Func.getContext());
392 
393  if (HiddenArgNumBytes >= 8)
394  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395  if (HiddenArgNumBytes >= 16)
396  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397  if (HiddenArgNumBytes >= 24)
398  emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399 
400  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
402 
403  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
404  // "none" argument.
405  if (HiddenArgNumBytes >= 32) {
406  if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
407  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
408  else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
409  // The printf runtime binding pass should have ensured that hostcall and
410  // printf are not used in the same module.
411  assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
412  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
413  } else
414  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
415  }
416 
417  // Emit "default queue" and "completion action" arguments if enqueue kernel is
418  // used, otherwise emit dummy "none" arguments.
419  if (HiddenArgNumBytes >= 48) {
420  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
421  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
422  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
423  } else {
424  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
425  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
426  }
427  }
428 
429  // Emit the pointer argument for multi-grid object.
430  if (HiddenArgNumBytes >= 56)
431  emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
432 }
433 
435  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
436 }
437 
439  const IsaInfo::AMDGPUTargetID &TargetID) {
440  emitVersion();
441  emitPrintf(Mod);
442 }
443 
445  std::string HSAMetadataString;
446  if (toString(HSAMetadata, HSAMetadataString))
447  return;
448 
449  if (DumpHSAMetadata)
450  dump(HSAMetadataString);
451  if (VerifyHSAMetadata)
452  verify(HSAMetadataString);
453 }
454 
456  const SIProgramInfo &ProgramInfo) {
457  auto &Func = MF.getFunction();
458  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
459  return;
460 
461  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
462  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
463 
464  HSAMetadata.mKernels.push_back(Kernel::Metadata());
465  auto &Kernel = HSAMetadata.mKernels.back();
466 
467  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
468  Kernel.mName = std::string(Func.getName());
469  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
470  emitKernelLanguage(Func);
471  emitKernelAttrs(Func);
472  emitKernelArgs(Func, ST);
473  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
474  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
475 }
476 
477 //===----------------------------------------------------------------------===//
478 // HSAMetadataStreamerV3
479 //===----------------------------------------------------------------------===//
480 
481 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
482  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
483 }
484 
485 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
486  errs() << "AMDGPU HSA Metadata Parser Test: ";
487 
488  msgpack::Document FromHSAMetadataString;
489 
490  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
491  errs() << "FAIL\n";
492  return;
493  }
494 
495  std::string ToHSAMetadataString;
496  raw_string_ostream StrOS(ToHSAMetadataString);
497  FromHSAMetadataString.toYAML(StrOS);
498 
499  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
500  if (HSAMetadataString != ToHSAMetadataString) {
501  errs() << "Original input: " << HSAMetadataString << '\n'
502  << "Produced output: " << StrOS.str() << '\n';
503  }
504 }
505 
509  .Case("read_only", StringRef("read_only"))
510  .Case("write_only", StringRef("write_only"))
511  .Case("read_write", StringRef("read_write"))
512  .Default(None);
513 }
514 
517  switch (AddressSpace) {
519  return StringRef("private");
521  return StringRef("global");
523  return StringRef("constant");
525  return StringRef("local");
527  return StringRef("generic");
529  return StringRef("region");
530  default:
531  return None;
532  }
533 }
534 
536  StringRef BaseTypeName) const {
537  if (TypeQual.contains("pipe"))
538  return "pipe";
539 
540  return StringSwitch<StringRef>(BaseTypeName)
541  .Case("image1d_t", "image")
542  .Case("image1d_array_t", "image")
543  .Case("image1d_buffer_t", "image")
544  .Case("image2d_t", "image")
545  .Case("image2d_array_t", "image")
546  .Case("image2d_array_depth_t", "image")
547  .Case("image2d_array_msaa_t", "image")
548  .Case("image2d_array_msaa_depth_t", "image")
549  .Case("image2d_depth_t", "image")
550  .Case("image2d_msaa_t", "image")
551  .Case("image2d_msaa_depth_t", "image")
552  .Case("image3d_t", "image")
553  .Case("sampler_t", "sampler")
554  .Case("queue_t", "queue")
555  .Default(isa<PointerType>(Ty)
557  ? "dynamic_shared_pointer"
558  : "global_buffer")
559  : "by_value");
560 }
561 
562 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
563  switch (Ty->getTypeID()) {
564  case Type::IntegerTyID: {
565  if (!Signed)
566  return (Twine('u') + getTypeName(Ty, true)).str();
567 
568  auto BitWidth = Ty->getIntegerBitWidth();
569  switch (BitWidth) {
570  case 8:
571  return "char";
572  case 16:
573  return "short";
574  case 32:
575  return "int";
576  case 64:
577  return "long";
578  default:
579  return (Twine('i') + Twine(BitWidth)).str();
580  }
581  }
582  case Type::HalfTyID:
583  return "half";
584  case Type::FloatTyID:
585  return "float";
586  case Type::DoubleTyID:
587  return "double";
588  case Type::FixedVectorTyID: {
589  auto VecTy = cast<FixedVectorType>(Ty);
590  auto ElTy = VecTy->getElementType();
591  auto NumElements = VecTy->getNumElements();
592  return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
593  }
594  default:
595  return "unknown";
596  }
597 }
598 
601  auto Dims = HSAMetadataDoc->getArrayNode();
602  if (Node->getNumOperands() != 3)
603  return Dims;
604 
605  for (auto &Op : Node->operands())
606  Dims.push_back(Dims.getDocument()->getNode(
607  uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
608  return Dims;
609 }
610 
612  auto Version = HSAMetadataDoc->getArrayNode();
613  Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
614  Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
615  getRootMetadata("amdhsa.version") = Version;
616 }
617 
619  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
620  if (!Node)
621  return;
622 
623  auto Printf = HSAMetadataDoc->getArrayNode();
624  for (auto Op : Node->operands())
625  if (Op->getNumOperands())
626  Printf.push_back(Printf.getDocument()->getNode(
627  cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
628  getRootMetadata("amdhsa.printf") = Printf;
629 }
630 
632  msgpack::MapDocNode Kern) {
633  // TODO: What about other languages?
634  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
635  if (!Node || !Node->getNumOperands())
636  return;
637  auto Op0 = Node->getOperand(0);
638  if (Op0->getNumOperands() <= 1)
639  return;
640 
641  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
642  auto LanguageVersion = Kern.getDocument()->getArrayNode();
643  LanguageVersion.push_back(Kern.getDocument()->getNode(
644  mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
645  LanguageVersion.push_back(Kern.getDocument()->getNode(
646  mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
647  Kern[".language_version"] = LanguageVersion;
648 }
649 
651  msgpack::MapDocNode Kern) {
652 
653  if (auto Node = Func.getMetadata("reqd_work_group_size"))
654  Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
655  if (auto Node = Func.getMetadata("work_group_size_hint"))
656  Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
657  if (auto Node = Func.getMetadata("vec_type_hint")) {
658  Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
659  getTypeName(
660  cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
661  mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
662  /*Copy=*/true);
663  }
664  if (Func.hasFnAttribute("runtime-handle")) {
665  Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
666  Func.getFnAttribute("runtime-handle").getValueAsString().str(),
667  /*Copy=*/true);
668  }
669  if (Func.hasFnAttribute("device-init"))
670  Kern[".kind"] = Kern.getDocument()->getNode("init");
671  else if (Func.hasFnAttribute("device-fini"))
672  Kern[".kind"] = Kern.getDocument()->getNode("fini");
673 }
674 
676  const GCNSubtarget &ST,
677  msgpack::MapDocNode Kern) {
678  unsigned Offset = 0;
679  auto Args = HSAMetadataDoc->getArrayNode();
680  for (auto &Arg : Func.args())
682 
684 
685  Kern[".args"] = Args;
686 }
687 
690  auto Func = Arg.getParent();
691  auto ArgNo = Arg.getArgNo();
692  const MDNode *Node;
693 
694  StringRef Name;
695  Node = Func->getMetadata("kernel_arg_name");
696  if (Node && ArgNo < Node->getNumOperands())
697  Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
698  else if (Arg.hasName())
699  Name = Arg.getName();
700 
702  Node = Func->getMetadata("kernel_arg_type");
703  if (Node && ArgNo < Node->getNumOperands())
704  TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
705 
706  StringRef BaseTypeName;
707  Node = Func->getMetadata("kernel_arg_base_type");
708  if (Node && ArgNo < Node->getNumOperands())
709  BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
710 
712  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
713  Arg.hasNoAliasAttr()) {
714  AccQual = "read_only";
715  } else {
716  Node = Func->getMetadata("kernel_arg_access_qual");
717  if (Node && ArgNo < Node->getNumOperands())
718  AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
719  }
720 
721  StringRef TypeQual;
722  Node = Func->getMetadata("kernel_arg_type_qual");
723  if (Node && ArgNo < Node->getNumOperands())
724  TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
725 
726  const DataLayout &DL = Func->getParent()->getDataLayout();
727 
729  Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
730 
731  // FIXME: Need to distinguish in memory alignment from pointer alignment.
732  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
733  if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
734  PointeeAlign = Arg.getParamAlign().valueOrOne();
735  }
736 
737  // There's no distinction between byval aggregates and raw aggregates.
738  Type *ArgTy;
739  Align ArgAlign;
740  std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
741 
742  emitKernelArg(DL, ArgTy, ArgAlign,
743  getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
744  PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
745 }
746 
748  const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
750  StringRef Name, StringRef TypeName, StringRef BaseTypeName,
751  StringRef AccQual, StringRef TypeQual) {
752  auto Arg = Args.getDocument()->getMapNode();
753 
754  if (!Name.empty())
755  Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
756  if (!TypeName.empty())
757  Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
758  auto Size = DL.getTypeAllocSize(Ty);
759  Arg[".size"] = Arg.getDocument()->getNode(Size);
760  Offset = alignTo(Offset, Alignment);
761  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
762  Offset += Size;
763  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
764  if (PointeeAlign)
765  Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
766 
767  if (auto PtrTy = dyn_cast<PointerType>(Ty))
768  if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
769  Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
770 
771  if (auto AQ = getAccessQualifier(AccQual))
772  Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
773 
774  // TODO: Emit Arg[".actual_access"].
775 
776  SmallVector<StringRef, 1> SplitTypeQuals;
777  TypeQual.split(SplitTypeQuals, " ", -1, false);
778  for (StringRef Key : SplitTypeQuals) {
779  if (Key == "const")
780  Arg[".is_const"] = Arg.getDocument()->getNode(true);
781  else if (Key == "restrict")
782  Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
783  else if (Key == "volatile")
784  Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
785  else if (Key == "pipe")
786  Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
787  }
788 
789  Args.push_back(Arg);
790 }
791 
793  const GCNSubtarget &ST,
794  unsigned &Offset,
796  unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
797  if (!HiddenArgNumBytes)
798  return;
799 
800  const Module *M = Func.getParent();
801  auto &DL = M->getDataLayout();
802  auto Int64Ty = Type::getInt64Ty(Func.getContext());
803 
804  if (HiddenArgNumBytes >= 8)
805  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
806  Args);
807  if (HiddenArgNumBytes >= 16)
808  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
809  Args);
810  if (HiddenArgNumBytes >= 24)
811  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
812  Args);
813 
814  auto Int8PtrTy =
815  Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
816 
817  // Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
818  // if "hostcall" module flag is set, otherwise emit dummy "none" argument.
819  if (HiddenArgNumBytes >= 32) {
820  if (M->getNamedMetadata("llvm.printf.fmts"))
821  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
822  Args);
823  else if (M->getModuleFlag("amdgpu_hostcall")) {
824  // The printf runtime binding pass should have ensured that hostcall and
825  // printf are not used in the same module.
826  assert(!M->getNamedMetadata("llvm.printf.fmts"));
827  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
828  Args);
829  } else
830  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
831  }
832 
833  // Emit "default queue" and "completion action" arguments if enqueue kernel is
834  // used, otherwise emit dummy "none" arguments.
835  if (HiddenArgNumBytes >= 48) {
836  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
837  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
838  Args);
839  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
840  Args);
841  } else {
842  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
844  }
845  }
846 
847  // Emit the pointer argument for multi-grid object.
848  if (HiddenArgNumBytes >= 56)
849  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
850  Args);
851 }
852 
855  const SIProgramInfo &ProgramInfo) const {
856  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
858  const Function &F = MF.getFunction();
859 
860  auto Kern = HSAMetadataDoc->getMapNode();
861 
862  Align MaxKernArgAlign;
863  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
864  STM.getKernArgSegmentSize(F, MaxKernArgAlign));
865  Kern[".group_segment_fixed_size"] =
866  Kern.getDocument()->getNode(ProgramInfo.LDSSize);
867  Kern[".private_segment_fixed_size"] =
868  Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
869 
870  // FIXME: The metadata treats the minimum as 16?
871  Kern[".kernarg_segment_align"] =
872  Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
873  Kern[".wavefront_size"] =
874  Kern.getDocument()->getNode(STM.getWavefrontSize());
875  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
876  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
877  Kern[".max_flat_workgroup_size"] =
878  Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
879  Kern[".sgpr_spill_count"] =
880  Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
881  Kern[".vgpr_spill_count"] =
882  Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
883 
884  return Kern;
885 }
886 
888  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
889 }
890 
892  const IsaInfo::AMDGPUTargetID &TargetID) {
893  emitVersion();
894  emitPrintf(Mod);
895  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
896 }
897 
899  std::string HSAMetadataString;
900  raw_string_ostream StrOS(HSAMetadataString);
901  HSAMetadataDoc->toYAML(StrOS);
902 
903  if (DumpHSAMetadata)
904  dump(StrOS.str());
905  if (VerifyHSAMetadata)
906  verify(StrOS.str());
907 }
908 
910  const SIProgramInfo &ProgramInfo) {
911  auto &Func = MF.getFunction();
912  auto Kern = getHSAKernelProps(MF, ProgramInfo);
913  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
914 
915  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
916  Func.getCallingConv() == CallingConv::SPIR_KERNEL);
917 
918  auto Kernels =
919  getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
920 
921  {
922  Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
923  Kern[".symbol"] = Kern.getDocument()->getNode(
924  (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
925  emitKernelLanguage(Func, Kern);
926  emitKernelAttrs(Func, Kern);
927  emitKernelArgs(Func, ST, Kern);
928  }
929 
930  Kernels.push_back(Kern);
931 }
932 
933 //===----------------------------------------------------------------------===//
934 // HSAMetadataStreamerV4
935 //===----------------------------------------------------------------------===//
936 
937 void MetadataStreamerV4::emitVersion() {
938  auto Version = HSAMetadataDoc->getArrayNode();
939  Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
940  Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
941  getRootMetadata("amdhsa.version") = Version;
942 }
943 
944 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
945  getRootMetadata("amdhsa.target") =
946  HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
947 }
948 
950  const IsaInfo::AMDGPUTargetID &TargetID) {
951  emitVersion();
952  emitTargetID(TargetID);
953  emitPrintf(Mod);
954  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
955 }
956 
957 } // end namespace HSAMD
958 } // end namespace AMDGPU
959 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4645
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:909
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AMDGPU::HSAMD::AccessQualifier::Unknown
@ Unknown
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArgs
void emitKernelArgs(const Function &Func, const GCNSubtarget &ST, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:675
llvm::msgpack::DocNode::getDocument
Document * getDocument() const
Definition: MsgPackDocument.h:80
SIMachineFunctionInfo.h
llvm::AMDGPU::HSAMD::Metadata::mVersion
std::vector< uint32_t > mVersion
HSA metadata version. Required.
Definition: AMDGPUMetadata.h:433
llvm::Function
Definition: Function.h:62
llvm::AMDGPU::HSAMD::MetadataStreamerV2::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:444
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:182
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitHiddenKernelArgs
void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST, unsigned &Offset, msgpack::ArrayDocNode Args)
Definition: AMDGPUHSAMetadataStreamer.cpp:792
llvm::AMDGPU::HSAMD::ValueKind::ByValue
@ ByValue
AMDGPUHSAMetadataStreamer.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::SmallVector< StringRef, 1 >
llvm::AMDGPU::HSAMD::MetadataStreamerV3::HSAMetadataDoc
std::unique_ptr< msgpack::Document > HSAMetadataDoc
Definition: AMDGPUHSAMetadataStreamer.h:61
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:736
llvm::SIProgramInfo::NumSGPR
uint32_t NumSGPR
Definition: SIProgramInfo.h:51
llvm::AMDGPU::HSAMD::VersionMajorV4
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
Definition: AMDGPUMetadata.h:43
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Region
@ Region
llvm::AMDGPU::HSAMD::MetadataStreamerV3::dump
void dump(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:481
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::SIProgramInfo::NumVGPR
uint32_t NumVGPR
Definition: SIProgramInfo.h:46
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getWorkGroupDimensions
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
Definition: AMDGPUHSAMetadataStreamer.cpp:600
llvm::AMDGPU::HSAMD::MetadataStreamerV4::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:949
llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:52
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:688
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPU::HSAMD::VersionMajorV2
constexpr uint32_t VersionMajorV2
HSA metadata major version for code object V2.
Definition: AMDGPUMetadata.h:33
llvm::AMDGPU::HSAMD::AccessQualifier
AccessQualifier
Access qualifiers.
Definition: AMDGPUMetadata.h:53
llvm::AMDGPU::HSAMD::Kernel::Key::DebugProps
constexpr char DebugProps[]
Key for Kernel::Metadata::mDebugProps.
Definition: AMDGPUMetadata.h:393
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::AMDGPU::HSAMD::fromString
std::error_code fromString(StringRef String, Metadata &HSAMetadata)
Converts String to HSAMetadata.
Definition: AMDGPUMetadata.cpp:213
llvm::AMDGPU::HSAMD::Key::Kernels
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
Definition: AMDGPUMetadata.h:427
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelLanguage
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:631
llvm::AMDGPU::HSAMD::ValueKind
ValueKind
Value kinds.
Definition: AMDGPUMetadata.h:73
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:86
llvm::AMDGPUTargetStreamer::EmitHSAMetadata
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)=0
Emit HSA Metadata.
llvm::AMDGPU::HSAMD::ValueKind::HiddenCompletionAction
@ HiddenCompletionAction
GCNSubtarget.h
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
llvm::StringRef::split
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:749
llvm::AMDGPU::HSAMD::VersionMinorV2
constexpr uint32_t VersionMinorV2
HSA metadata minor version for code object V2.
Definition: AMDGPUMetadata.h:35
llvm::SIProgramInfo::ScratchSize
uint64_t ScratchSize
Definition: SIProgramInfo.h:37
llvm::msgpack::Document::getArrayNode
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
Definition: MsgPackDocument.h:380
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::AccQual
constexpr char AccQual[]
Key for Kernel::Arg::Metadata::mAccQual.
Definition: AMDGPUMetadata.h:186
llvm::AMDGPU::HSAMD::Kernel::Key::CodeProps
constexpr char CodeProps[]
Key for Kernel::Metadata::mCodeProps.
Definition: AMDGPUMetadata.h:391
llvm::AMDGPU::HSAMD::MetadataStreamerV3::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:891
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetY
@ HiddenGlobalOffsetY
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::StringRef::contains
LLVM_NODISCARD bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:462
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetZ
@ HiddenGlobalOffsetZ
llvm::msgpack::Document
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
Definition: MsgPackDocument.h:272
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getAddressSpaceQualifier
Optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
Definition: AMDGPUHSAMetadataStreamer.cpp:516
SIProgramInfo.h
getArgumentTypeAlign
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
Definition: AMDGPUHSAMetadataStreamer.cpp:24
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::SIProgramInfo::DynamicCallStack
bool DynamicCallStack
Definition: SIProgramInfo.h:66
llvm::AMDGPU::HSAMD::MetadataStreamerV3::verify
void verify(StringRef HSAMetadataString) const
Definition: AMDGPUHSAMetadataStreamer.cpp:485
llvm::AMDGPU::HSAMD::ValueKind::GlobalBuffer
@ GlobalBuffer
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::None
const NoneType None
Definition: None.h:23
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::msgpack::Document::fromYAML
bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
Definition: MsgPackDocumentYAML.cpp:242
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:196
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Global
@ Global
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getRootMetadata
msgpack::DocNode & getRootMetadata(StringRef Key)
Definition: AMDGPUHSAMetadataStreamer.h:106
llvm::AMDGPU::HSAMD::AccessQualifier::WriteOnly
@ WriteOnly
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::cl::opt< bool >
llvm::AMDGPU::HSAMD::ValueKind::Image
@ Image
llvm::msgpack::MapDocNode
A DocNode that is a map.
Definition: MsgPackDocument.h:219
llvm::AMDGPU::HSAMD::Metadata::mPrintf
std::vector< std::string > mPrintf
Printf metadata. Optional.
Definition: AMDGPUMetadata.h:435
AMDGPUTargetStreamer.h
uint64_t
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Local
@ Local
llvm::AMDGPU::HSAMD::AccessQualifier::ReadOnly
@ ReadOnly
llvm::AMDGPU::HSAMD::AccessQualifier::ReadWrite
@ ReadWrite
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPU::HSAMD::Kernel::Metadata
In-memory representation of kernel metadata.
Definition: AMDGPUMetadata.h:397
llvm::AMDGPU::HSAMD::ValueKind::HiddenDefaultQueue
@ HiddenDefaultQueue
llvm::AMDGPU::HSAMD::ValueKind::HiddenNone
@ HiddenNone
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelAttrs
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern)
Definition: AMDGPUHSAMetadataStreamer.cpp:650
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::PointeeAlign
constexpr char PointeeAlign[]
Key for Kernel::Arg::Metadata::mPointeeAlign.
Definition: AMDGPUMetadata.h:182
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Generic
@ Generic
llvm::AMDGPU::HSAMD::ValueKind::Pipe
@ Pipe
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::msgpack::Document::getNode
DocNode getNode()
Create a nil node associated with this Document.
Definition: MsgPackDocument.h:308
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Unknown
@ Unknown
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Constant
@ Constant
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::AMDGPU::HSAMD::ValueKind::HiddenPrintfBuffer
@ HiddenPrintfBuffer
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArg
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
Definition: AMDGPUHSAMetadataStreamer.cpp:688
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::HSAMD::VersionMajorV3
constexpr uint32_t VersionMajorV3
HSA metadata major version for code object V3.
Definition: AMDGPUMetadata.h:38
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AMDGPU::HSAMD::ValueKind::Sampler
@ Sampler
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:887
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::HSAMD::Kernel::Key::LanguageVersion
constexpr char LanguageVersion[]
Key for Kernel::Metadata::mLanguageVersion.
Definition: AMDGPUMetadata.h:385
llvm::AMDGPU::HSAMD::Metadata::mKernels
std::vector< Kernel::Metadata > mKernels
Kernels metadata. Required.
Definition: AMDGPUMetadata.h:437
llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:34
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::AMDGPU::HSAMD::ValueKind::HiddenMultiGridSyncArg
@ HiddenMultiGridSyncArg
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPU::HSAMD::AddressSpaceQualifier
AddressSpaceQualifier
Address space qualifiers.
Definition: AMDGPUMetadata.h:62
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitPrintf
void emitPrintf(const Module &Mod)
Definition: AMDGPUHSAMetadataStreamer.cpp:618
llvm::AMDGPU::HSAMD::ValueKind::DynamicSharedPointer
@ DynamicSharedPointer
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
llvm::AMDGPU::HSAMD::MetadataStreamerV3::end
void end() override
Definition: AMDGPUHSAMetadataStreamer.cpp:898
llvm::AMDGPU::HSAMD::ValueKind::HiddenHostcallBuffer
@ HiddenHostcallBuffer
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::toString
std::string toString() const
Definition: AMDGPUBaseInfo.cpp:414
llvm::msgpack::Document::toYAML
void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
Definition: MsgPackDocumentYAML.cpp:236
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AMDGPU::HSAMD::MetadataStreamerV2::emitTo
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
Definition: AMDGPUHSAMetadataStreamer.cpp:434
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:170
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::VerifyHSAMetadata
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
llvm::msgpack::DocNode::getArray
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Definition: MsgPackDocument.h:129
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getHSAKernelProps
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const
Definition: AMDGPUHSAMetadataStreamer.cpp:854
llvm::DumpHSAMetadata
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
llvm::AMDGPU::HSAMD::AccessQualifier::Default
@ Default
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:425
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getValueKind
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
Definition: AMDGPUHSAMetadataStreamer.cpp:535
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::msgpack::ArrayDocNode
A DocNode that is an array.
Definition: MsgPackDocument.h:249
llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:25
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:525
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getTypeName
std::string getTypeName(Type *Ty, bool Signed) const
Definition: AMDGPUHSAMetadataStreamer.cpp:562
llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Private
@ Private
Version
uint64_t Version
Definition: RawMemProfReader.cpp:25
llvm::AMDGPU::HSAMD::MetadataStreamerV2::begin
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
Definition: AMDGPUHSAMetadataStreamer.cpp:438
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::Module::getNamedMetadata
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:254
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AMDGPU::HSAMD::MetadataStreamerV3::getAccessQualifier
Optional< StringRef > getAccessQualifier(StringRef AccQual) const
Definition: AMDGPUHSAMetadataStreamer.cpp:507
llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitVersion
void emitVersion()
Definition: AMDGPUHSAMetadataStreamer.cpp:611
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
llvm::AMDGPU::HSAMD::MetadataStreamerV2::emitKernel
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
Definition: AMDGPUHSAMetadataStreamer.cpp:455
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::cl::desc
Definition: CommandLine.h:412
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::AMDGPU::HSAMD::VersionMinorV3
constexpr uint32_t VersionMinorV3
HSA metadata minor version for code object V3.
Definition: AMDGPUMetadata.h:40
llvm::raw_string_ostream::str
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:650
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::AMDGPU::HSAMD::ValueKind::HiddenGlobalOffsetX
@ HiddenGlobalOffsetX
llvm::AMDGPU::HSAMD::VersionMinorV4
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
Definition: AMDGPUMetadata.h:45
llvm::AMDGPU::HSAMD::toString
std::error_code toString(Metadata HSAMetadata, std::string &String)
Converts HSAMetadata to String.
Definition: AMDGPUMetadata.cpp:219