Line data Source code
1 : //===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : ///
12 : /// Generates AMDGPU runtime metadata for YAML mapping.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AMDGPU.h"
17 : #include "AMDGPURuntimeMetadata.h"
18 : #include "MCTargetDesc/AMDGPURuntimeMD.h"
19 : #include "Utils/AMDGPUBaseInfo.h"
20 : #include "llvm/ADT/SmallVector.h"
21 : #include "llvm/ADT/StringRef.h"
22 : #include "llvm/ADT/StringSwitch.h"
23 : #include "llvm/ADT/Twine.h"
24 : #include "llvm/IR/Constants.h"
25 : #include "llvm/IR/DataLayout.h"
26 : #include "llvm/IR/DerivedTypes.h"
27 : #include "llvm/IR/Function.h"
28 : #include "llvm/IR/Metadata.h"
29 : #include "llvm/IR/Module.h"
30 : #include "llvm/IR/Type.h"
31 : #include "llvm/Support/Casting.h"
32 : #include "llvm/Support/CommandLine.h"
33 : #include "llvm/Support/raw_ostream.h"
34 : #include "llvm/Support/YAMLTraits.h"
35 : #include <cassert>
36 : #include <cstdint>
37 : #include <limits>
38 : #include <vector>
39 :
40 : using namespace llvm;
41 : using namespace llvm::AMDGPU::IsaInfo;
42 : using namespace ::AMDGPU::RuntimeMD;
43 :
44 : static cl::opt<bool>
45 128550 : DumpRuntimeMD("amdgpu-dump-rtmd",
46 192825 : cl::desc("Dump AMDGPU runtime metadata"));
47 :
48 : static cl::opt<bool>
49 128550 : CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
50 192825 : cl::desc("Check AMDGPU runtime metadata YAML parser"));
51 :
52 : LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
53 : LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
54 : LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
55 : LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
56 : LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
57 :
58 : namespace llvm {
59 : namespace yaml {
60 :
61 : template <> struct MappingTraits<KernelArg::Metadata> {
62 237 : static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
63 474 : YamlIO.mapRequired(KeyName::ArgSize, A.Size);
64 474 : YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
65 474 : YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
66 474 : YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
67 474 : YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
68 711 : YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
69 711 : YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
70 474 : YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
71 474 : YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
72 474 : YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
73 474 : YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
74 474 : YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
75 474 : YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
76 237 : }
77 : static const bool flow = true;
78 : };
79 :
80 : template <> struct MappingTraits<Kernel::Metadata> {
81 50 : static void mapping(IO &YamlIO, Kernel::Metadata &K) {
82 100 : YamlIO.mapRequired(KeyName::KernelName, K.Name);
83 150 : YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
84 100 : YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
85 100 : YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
86 100 : YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
87 150 : YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
88 50 : YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
89 50 : INVALID_KERNEL_INDEX);
90 100 : YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
91 50 : uint8_t(0));
92 100 : YamlIO.mapOptional(KeyName::Args, K.Args);
93 50 : }
94 : static const bool flow = true;
95 : };
96 :
97 : template <> struct MappingTraits<IsaInfo::Metadata> {
98 173 : static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
99 346 : YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
100 346 : YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
101 346 : YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
102 346 : YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
103 173 : YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
104 173 : I.MaxFlatWorkGroupSize);
105 346 : YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
106 346 : YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
107 173 : YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
108 173 : I.AddressableNumSGPRs);
109 346 : YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
110 346 : YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
111 173 : YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
112 173 : I.AddressableNumVGPRs);
113 173 : }
114 : static const bool flow = true;
115 : };
116 :
117 : template <> struct MappingTraits<Program::Metadata> {
118 188 : static void mapping(IO &YamlIO, Program::Metadata &Prog) {
119 376 : YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
120 376 : YamlIO.mapOptional(KeyName::IsaInfo, Prog.IsaInfo);
121 376 : YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
122 376 : YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
123 188 : }
124 : static const bool flow = true;
125 : };
126 :
127 : } // end namespace yaml
128 : } // end namespace llvm
129 :
130 : // Get a vector of three integer values from MDNode \p Node;
131 2 : static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
132 : assert(Node->getNumOperands() == 3);
133 2 : std::vector<uint32_t> V;
134 8 : for (const MDOperand &Op : Node->operands()) {
135 6 : const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
136 12 : V.push_back(CI->getZExtValue());
137 : }
138 2 : return V;
139 : }
140 :
141 11 : static std::string getOCLTypeName(Type *Ty, bool Signed) {
142 11 : switch (Ty->getTypeID()) {
143 : case Type::HalfTyID:
144 2 : return "half";
145 : case Type::FloatTyID:
146 2 : return "float";
147 : case Type::DoubleTyID:
148 2 : return "double";
149 : case Type::IntegerTyID: {
150 6 : if (!Signed)
151 5 : return (Twine('u') + getOCLTypeName(Ty, true)).str();
152 5 : unsigned BW = Ty->getIntegerBitWidth();
153 5 : switch (BW) {
154 : case 8:
155 2 : return "char";
156 : case 16:
157 2 : return "short";
158 : case 32:
159 4 : return "int";
160 : case 64:
161 2 : return "long";
162 : default:
163 0 : return (Twine('i') + Twine(BW)).str();
164 : }
165 : }
166 : case Type::VectorTyID: {
167 1 : VectorType *VecTy = cast<VectorType>(Ty);
168 1 : Type *EleTy = VecTy->getElementType();
169 2 : unsigned Size = VecTy->getVectorNumElements();
170 4 : return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
171 : }
172 : default:
173 2 : return "unknown";
174 : }
175 : }
176 :
177 173 : static KernelArg::ValueType getRuntimeMDValueType(
178 : Type *Ty, StringRef TypeName) {
179 244 : switch (Ty->getTypeID()) {
180 : case Type::HalfTyID:
181 : return KernelArg::F16;
182 : case Type::FloatTyID:
183 1 : return KernelArg::F32;
184 : case Type::DoubleTyID:
185 1 : return KernelArg::F64;
186 : case Type::IntegerTyID: {
187 322 : bool Signed = !TypeName.startswith("u");
188 161 : switch (Ty->getIntegerBitWidth()) {
189 : case 8:
190 39 : return Signed ? KernelArg::I8 : KernelArg::U8;
191 : case 16:
192 2 : return Signed ? KernelArg::I16 : KernelArg::U16;
193 : case 32:
194 21 : return Signed ? KernelArg::I32 : KernelArg::U32;
195 : case 64:
196 98 : return Signed ? KernelArg::I64 : KernelArg::U64;
197 : default:
198 : // Runtime does not recognize other integer types. Report as struct type.
199 : return KernelArg::Struct;
200 : }
201 : }
202 : case Type::VectorTyID:
203 28 : return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
204 : case Type::PointerTyID:
205 114 : return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
206 : default:
207 9 : return KernelArg::Struct;
208 : }
209 : }
210 :
211 : static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
212 : AMDGPUAS::AddressSpaces A) {
213 : switch (A) {
214 : case AMDGPUAS::GLOBAL_ADDRESS:
215 : return KernelArg::Global;
216 : case AMDGPUAS::CONSTANT_ADDRESS:
217 : return KernelArg::Constant;
218 : case AMDGPUAS::LOCAL_ADDRESS:
219 : return KernelArg::Local;
220 : case AMDGPUAS::FLAT_ADDRESS:
221 : return KernelArg::Generic;
222 : case AMDGPUAS::REGION_ADDRESS:
223 : return KernelArg::Region;
224 : default:
225 : return KernelArg::Private;
226 : }
227 : }
228 :
229 173 : static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
230 : Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
231 : StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
232 : StringRef AccQual = "") {
233 173 : KernelArg::Metadata Arg;
234 :
235 : // Set ArgSize and ArgAlign.
236 173 : Arg.Size = DL.getTypeAllocSize(T);
237 173 : Arg.Align = DL.getABITypeAlignment(T);
238 173 : if (auto PT = dyn_cast<PointerType>(T)) {
239 55 : auto ET = PT->getElementType();
240 55 : if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
241 7 : Arg.PointeeAlign = DL.getABITypeAlignment(ET);
242 : }
243 :
244 : // Set ArgTypeName.
245 346 : Arg.TypeName = TypeName;
246 :
247 : // Set ArgName.
248 346 : Arg.Name = ArgName;
249 :
250 : // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
251 346 : SmallVector<StringRef, 1> SplitQ;
252 173 : TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
253 :
254 523 : for (StringRef KeyName : SplitQ) {
255 4 : auto *P = StringSwitch<uint8_t *>(KeyName)
256 12 : .Case("volatile", &Arg.IsVolatile)
257 12 : .Case("restrict", &Arg.IsRestrict)
258 12 : .Case("const", &Arg.IsConst)
259 12 : .Case("pipe", &Arg.IsPipe)
260 4 : .Default(nullptr);
261 4 : if (P)
262 4 : *P = 1;
263 : }
264 :
265 : // Set ArgKind.
266 173 : Arg.Kind = Kind;
267 :
268 : // Set ArgValueType.
269 173 : Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
270 :
271 : // Set ArgAccQual.
272 173 : if (!AccQual.empty()) {
273 92 : Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
274 138 : .Case("read_only", KernelArg::ReadOnly)
275 138 : .Case("write_only", KernelArg::WriteOnly)
276 138 : .Case("read_write", KernelArg::ReadWrite)
277 46 : .Default(KernelArg::AccNone);
278 : }
279 :
280 : // Set ArgAddrQual.
281 173 : if (auto *PT = dyn_cast<PointerType>(T)) {
282 55 : Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
283 55 : PT->getAddressSpace()));
284 : }
285 :
286 173 : return Arg;
287 : }
288 :
289 32 : static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
290 32 : Kernel::Metadata Kernel;
291 96 : Kernel.Name = F.getName();
292 32 : auto &M = *F.getParent();
293 :
294 : // Set Language and LanguageVersion.
295 32 : if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
296 32 : if (MD->getNumOperands() != 0) {
297 32 : auto Node = MD->getOperand(0);
298 32 : if (Node->getNumOperands() > 1) {
299 64 : Kernel.Language = "OpenCL C";
300 : uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
301 96 : ->getZExtValue();
302 : uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
303 96 : ->getZExtValue();
304 64 : Kernel.LanguageVersion.push_back(Major);
305 64 : Kernel.LanguageVersion.push_back(Minor);
306 : }
307 : }
308 : }
309 :
310 32 : const DataLayout &DL = F.getParent()->getDataLayout();
311 78 : for (auto &Arg : F.args()) {
312 46 : unsigned I = Arg.getArgNo();
313 46 : Type *T = Arg.getType();
314 : auto TypeName = dyn_cast<MDString>(F.getMetadata(
315 138 : "kernel_arg_type")->getOperand(I))->getString();
316 : auto BaseTypeName = cast<MDString>(F.getMetadata(
317 138 : "kernel_arg_base_type")->getOperand(I))->getString();
318 46 : StringRef ArgName;
319 46 : if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
320 0 : ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
321 : auto TypeQual = cast<MDString>(F.getMetadata(
322 138 : "kernel_arg_type_qual")->getOperand(I))->getString();
323 : auto AccQual = cast<MDString>(F.getMetadata(
324 138 : "kernel_arg_access_qual")->getOperand(I))->getString();
325 : KernelArg::Kind Kind;
326 46 : if (TypeQual.find("pipe") != StringRef::npos)
327 : Kind = KernelArg::Pipe;
328 45 : else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
329 135 : .Case("sampler_t", KernelArg::Sampler)
330 135 : .Case("queue_t", KernelArg::Queue)
331 : .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
332 135 : "image2d_t" , "image2d_array_t", KernelArg::Image)
333 : .Cases("image2d_depth_t", "image2d_array_depth_t",
334 : "image2d_msaa_t", "image2d_array_msaa_t",
335 135 : "image2d_msaa_depth_t", KernelArg::Image)
336 : .Cases("image2d_array_msaa_depth_t", "image3d_t",
337 135 : KernelArg::Image)
338 90 : .Default(isa<PointerType>(T) ?
339 46 : (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
340 : KernelArg::DynamicSharedPointer :
341 : KernelArg::GlobalBuffer) :
342 45 : KernelArg::ByValue);
343 92 : Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
344 46 : BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
345 : }
346 :
347 : // Emit hidden kernel arguments for OpenCL kernels.
348 32 : if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
349 32 : auto Int64T = Type::getInt64Ty(F.getContext());
350 192 : Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
351 32 : KernelArg::HiddenGlobalOffsetX));
352 192 : Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
353 32 : KernelArg::HiddenGlobalOffsetY));
354 192 : Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
355 32 : KernelArg::HiddenGlobalOffsetZ));
356 32 : if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
357 31 : auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
358 31 : KernelArg::Global);
359 186 : Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
360 31 : KernelArg::HiddenPrintfBuffer));
361 : }
362 : }
363 :
364 : // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
365 32 : if (auto RWGS = F.getMetadata("reqd_work_group_size"))
366 3 : Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
367 :
368 32 : if (auto WGSH = F.getMetadata("work_group_size_hint"))
369 3 : Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
370 :
371 32 : if (auto VTH = F.getMetadata("vec_type_hint"))
372 45 : Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
373 : VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
374 36 : VTH->getOperand(1))->getZExtValue());
375 :
376 32 : return Kernel;
377 : }
378 :
379 185 : static void getIsaInfo(const FeatureBitset &Features, IsaInfo::Metadata &IIM) {
380 185 : IIM.WavefrontSize = getWavefrontSize(Features);
381 185 : IIM.LocalMemorySize = getLocalMemorySize(Features);
382 185 : IIM.EUsPerCU = getEUsPerCU(Features);
383 185 : IIM.MaxWavesPerEU = getMaxWavesPerEU(Features);
384 185 : IIM.MaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features);
385 185 : IIM.SGPRAllocGranule = getSGPRAllocGranule(Features);
386 185 : IIM.TotalNumSGPRs = getTotalNumSGPRs(Features);
387 185 : IIM.AddressableNumSGPRs = getAddressableNumSGPRs(Features);
388 185 : IIM.VGPRAllocGranule = getVGPRAllocGranule(Features);
389 185 : IIM.TotalNumVGPRs = getTotalNumVGPRs(Features);
390 185 : IIM.AddressableNumVGPRs = getAddressableNumVGPRs(Features);
391 185 : }
392 :
393 0 : Program::Metadata::Metadata(const std::string &YAML) {
394 0 : yaml::Input Input(YAML);
395 0 : Input >> *this;
396 0 : }
397 :
398 166 : std::string Program::Metadata::toYAML() {
399 332 : std::string Text;
400 332 : raw_string_ostream Stream(Text);
401 : yaml::Output Output(Stream, nullptr,
402 332 : std::numeric_limits<int>::max() /* do not wrap line */);
403 166 : Output << *this;
404 498 : return Stream.str();
405 : }
406 :
407 0 : Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
408 0 : return Program::Metadata(S);
409 : }
410 :
411 : // Check if the YAML string can be parsed.
412 0 : static void checkRuntimeMDYAMLString(const std::string &YAML) {
413 0 : auto P = Program::Metadata::fromYAML(YAML);
414 0 : auto S = P.toYAML();
415 0 : errs() << "AMDGPU runtime metadata parser test "
416 0 : << (YAML == S ? "passes" : "fails") << ".\n";
417 0 : if (YAML != S) {
418 0 : errs() << "First output: " << YAML << '\n'
419 0 : << "Second output: " << S << '\n';
420 : }
421 0 : }
422 :
423 163 : std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
424 : const Module &M) {
425 326 : Program::Metadata Prog;
426 163 : Prog.MDVersionSeq.push_back(MDVersion);
427 163 : Prog.MDVersionSeq.push_back(MDRevision);
428 :
429 163 : getIsaInfo(Features, Prog.IsaInfo);
430 :
431 : // Set PrintfInfo.
432 163 : if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
433 5 : for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
434 2 : auto Node = MD->getOperand(I);
435 2 : if (Node->getNumOperands() > 0)
436 4 : Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
437 8 : ->getString());
438 : }
439 : }
440 :
441 : // Set Kernels.
442 2440 : for (auto &F: M.functions()) {
443 2277 : if (!F.getMetadata("kernel_arg_type"))
444 : continue;
445 32 : Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
446 : }
447 :
448 163 : auto YAML = Prog.toYAML();
449 :
450 163 : if (DumpRuntimeMD)
451 0 : errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
452 :
453 163 : if (CheckRuntimeMDParser)
454 0 : checkRuntimeMDYAMLString(YAML);
455 :
456 163 : return YAML;
457 : }
458 :
459 22 : ErrorOr<std::string> llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
460 : StringRef YAML) {
461 44 : Program::Metadata Prog;
462 44 : yaml::Input Input(YAML);
463 22 : Input >> Prog;
464 :
465 22 : getIsaInfo(Features, Prog.IsaInfo);
466 :
467 22 : if (Input.error())
468 19 : return Input.error();
469 9 : return Prog.toYAML();
470 192825 : }
|