LLVM 20.0.0git
Utility.cpp
Go to the documentation of this file.
1//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
13#include "llvm/IR/Constants.h"
14#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Value.h"
20
21using namespace llvm;
22using namespace llvm::offloading;
23
25 LLVMContext &C = M.getContext();
26 StructType *EntryTy =
27 StructType::getTypeByName(C, "struct.__tgt_offload_entry");
28 if (!EntryTy)
29 EntryTy = StructType::create(
30 "struct.__tgt_offload_entry", PointerType::getUnqual(C),
31 PointerType::getUnqual(C), M.getDataLayout().getIntPtrType(C),
33 return EntryTy;
34}
35
36// TODO: Rework this interface to be more generic.
37std::pair<Constant *, GlobalVariable *>
40 int32_t Flags, int32_t Data) {
41 llvm::Triple Triple(M.getTargetTriple());
42 Type *Int8PtrTy = PointerType::getUnqual(M.getContext());
43 Type *Int32Ty = Type::getInt32Ty(M.getContext());
44 Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
45
46 Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
47
48 StringRef Prefix =
49 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
50
51 // Create the constant string used to look up the symbol in the device.
52 auto *Str =
53 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
54 GlobalValue::InternalLinkage, AddrName, Prefix);
55 StringRef SectionName = ".llvm.rodata.offloading";
56 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
57 Str->setSection(SectionName);
58 Str->setAlignment(Align(1));
59
60 // Make a metadata node for these constants so it can be queried from IR.
61 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
62 Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
63 MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
64
65 // Construct the offloading entry.
66 Constant *EntryData[] = {
69 ConstantInt::get(SizeTy, Size),
70 ConstantInt::get(Int32Ty, Flags),
71 ConstantInt::get(Int32Ty, Data),
72 };
73 Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
74 return {EntryInitializer, Str};
75}
76
78 uint64_t Size, int32_t Flags, int32_t Data,
80 llvm::Triple Triple(M.getTargetTriple());
81
82 auto [EntryInitializer, NameGV] =
84
85 StringRef Prefix =
86 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
87 auto *Entry = new GlobalVariable(
88 M, getEntryTy(M),
89 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
90 Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
91 M.getDataLayout().getDefaultGlobalsAddressSpace());
92
93 // The entry has to be created in the section the linker expects it to be.
95 Entry->setSection((SectionName + "$OE").str());
96 else
97 Entry->setSection(SectionName);
98 Entry->setAlignment(Align(1));
99}
100
101std::pair<GlobalVariable *, GlobalVariable *>
103 llvm::Triple Triple(M.getTargetTriple());
104
105 auto *ZeroInitilaizer =
106 ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
107 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
108 auto *EntryType = ArrayType::get(getEntryTy(M), 0);
111
112 auto *EntriesB =
113 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
114 "__start_" + SectionName);
115 EntriesB->setVisibility(GlobalValue::HiddenVisibility);
116 auto *EntriesE =
117 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
118 "__stop_" + SectionName);
119 EntriesE->setVisibility(GlobalValue::HiddenVisibility);
120
121 if (Triple.isOSBinFormatELF()) {
122 // We assume that external begin/end symbols that we have created above will
123 // be defined by the linker. This is done whenever a section name with a
124 // valid C-identifier is present. We define a dummy variable here to force
125 // the linker to always provide these symbols.
126 auto *DummyEntry = new GlobalVariable(
127 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
128 ZeroInitilaizer, "__dummy." + SectionName);
129 DummyEntry->setSection(SectionName);
130 appendToCompilerUsed(M, DummyEntry);
131 } else {
132 // The COFF linker will merge sections containing a '$' together into a
133 // single section. The order of entries in this section will be sorted
134 // alphabetically by the characters following the '$' in the name. Set the
135 // sections here to ensure that the beginning and end symbols are sorted.
136 EntriesB->setSection((SectionName + "$OA").str());
137 EntriesE->setSection((SectionName + "$OZ").str());
138 }
139
140 return std::make_pair(EntriesB, EntriesE);
141}
142
144 uint32_t ImageFlags,
145 StringRef EnvTargetID) {
146 using namespace llvm::ELF;
147 StringRef EnvArch = EnvTargetID.split(":").first;
148
149 // Trivial check if the base processors match.
150 if (EnvArch != ImageArch)
151 return false;
152
153 // Check if the image is requesting xnack on or off.
154 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
155 case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
156 // The image is 'xnack-' so the environment must be 'xnack-'.
157 if (!EnvTargetID.contains("xnack-"))
158 return false;
159 break;
160 case EF_AMDGPU_FEATURE_XNACK_ON_V4:
161 // The image is 'xnack+' so the environment must be 'xnack+'.
162 if (!EnvTargetID.contains("xnack+"))
163 return false;
164 break;
165 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
166 case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
167 default:
168 break;
169 }
170
171 // Check if the image is requesting sramecc on or off.
172 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
173 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
174 // The image is 'sramecc-' so the environment must be 'sramecc-'.
175 if (!EnvTargetID.contains("sramecc-"))
176 return false;
177 break;
178 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
179 // The image is 'sramecc+' so the environment must be 'sramecc+'.
180 if (!EnvTargetID.contains("sramecc+"))
181 return false;
182 break;
183 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
184 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
185 break;
186 }
187
188 return true;
189}
190
191namespace {
192/// Reads the AMDGPU specific per-kernel-metadata from an image.
193class KernelInfoReader {
194public:
196 : KernelInfoMap(KIM) {}
197
198 /// Process ELF note to read AMDGPU metadata from respective information
199 /// fields.
200 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
201 if (Note.getName() != "AMDGPU")
202 return Error::success(); // We are not interested in other things
203
204 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
205 "Parse AMDGPU MetaData");
206 auto Desc = Note.getDesc(Align);
207 StringRef MsgPackString =
208 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
209 msgpack::Document MsgPackDoc;
210 if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
211 return Error::success();
212
214 if (!Verifier.verify(MsgPackDoc.getRoot()))
215 return Error::success();
216
217 auto RootMap = MsgPackDoc.getRoot().getMap(true);
218
219 if (auto Err = iterateAMDKernels(RootMap))
220 return Err;
221
222 return Error::success();
223 }
224
225private:
226 /// Extracts the relevant information via simple string look-up in the msgpack
227 /// document elements.
228 Error
229 extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
230 std::string &KernelName,
232 if (!V.first.isString())
233 return Error::success();
234
235 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
236 return DK.getString() == SK;
237 };
238
239 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
240 uint32_t *Vals) {
241 assert(DN.isArray() && "MsgPack DocNode is an array node");
242 auto DNA = DN.getArray();
243 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
244
245 int I = 0;
246 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
247 ++DNABegin) {
248 Vals[I++] = DNABegin->getUInt();
249 }
250 };
251
252 if (IsKey(V.first, ".name")) {
253 KernelName = V.second.toString();
254 } else if (IsKey(V.first, ".sgpr_count")) {
255 KernelData.SGPRCount = V.second.getUInt();
256 } else if (IsKey(V.first, ".sgpr_spill_count")) {
257 KernelData.SGPRSpillCount = V.second.getUInt();
258 } else if (IsKey(V.first, ".vgpr_count")) {
259 KernelData.VGPRCount = V.second.getUInt();
260 } else if (IsKey(V.first, ".vgpr_spill_count")) {
261 KernelData.VGPRSpillCount = V.second.getUInt();
262 } else if (IsKey(V.first, ".agpr_count")) {
263 KernelData.AGPRCount = V.second.getUInt();
264 } else if (IsKey(V.first, ".private_segment_fixed_size")) {
265 KernelData.PrivateSegmentSize = V.second.getUInt();
266 } else if (IsKey(V.first, ".group_segment_fixed_size")) {
267 KernelData.GroupSegmentList = V.second.getUInt();
268 } else if (IsKey(V.first, ".reqd_workgroup_size")) {
269 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
270 } else if (IsKey(V.first, ".workgroup_size_hint")) {
271 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
272 } else if (IsKey(V.first, ".wavefront_size")) {
273 KernelData.WavefrontSize = V.second.getUInt();
274 } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
275 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
276 }
277
278 return Error::success();
279 }
280
281 /// Get the "amdhsa.kernels" element from the msgpack Document
283 auto Res = MDN.find("amdhsa.kernels");
284 if (Res == MDN.end())
286 "Could not find amdhsa.kernels key");
287
288 auto Pair = *Res;
289 assert(Pair.second.isArray() &&
290 "AMDGPU kernel entries are arrays of entries");
291
292 return Pair.second.getArray();
293 }
294
295 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
296 /// MapDocNode that either maps a string to a single value (most of them) or
297 /// to another array of things. Currently, we only handle the case that maps
298 /// to scalar value.
299 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
301 std::string KernelName;
302 auto Entry = (*It).getMap();
303 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
304 if (auto Err = extractKernelData(*MI, KernelName, KernelData))
305 return Err;
306
307 KernelInfoMap.insert({KernelName, KernelData});
308 return Error::success();
309 }
310
311 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
312 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
313 auto KernelsOrErr = getAMDKernelsArray(MDN);
314 if (auto Err = KernelsOrErr.takeError())
315 return Err;
316
317 auto KernelsArr = *KernelsOrErr;
318 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
319 if (!It->isMap())
320 continue; // we expect <key,value> pairs
321
322 // Obtain the value for the different entries. Each array entry is a
323 // MapDocNode
324 if (auto Err = generateKernelInfo(It))
325 return Err;
326 }
327 return Error::success();
328 }
329
330 // Kernel names are the keys
332};
333} // namespace
334
336 MemoryBufferRef MemBuffer,
338 uint16_t &ELFABIVersion) {
339 Error Err = Error::success(); // Used later as out-parameter
340
341 auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
342 if (auto Err = ELFOrError.takeError())
343 return Err;
344
345 const object::ELF64LEFile ELFObj = ELFOrError.get();
347 if (!Sections)
348 return Sections.takeError();
349 KernelInfoReader Reader(KernelInfoMap);
350
351 // Read the code object version from ELF image header
352 auto Header = ELFObj.getHeader();
353 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
354 for (const auto &S : *Sections) {
355 if (S.sh_type != ELF::SHT_NOTE)
356 continue;
357
358 for (const auto N : ELFObj.notes(S, Err)) {
359 if (Err)
360 return Err;
361 // Fills the KernelInfoTabel entries in the reader
362 if ((Err = Reader.processNote(N, S.sh_addralign)))
363 return Err;
364 }
365 }
366 return Error::success();
367}
This is a verifier for AMDGPU HSA metadata, which can verify both well-typed metadata and untyped met...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
std::string Name
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares a class that exposes a simple in-memory representation of a document of MsgPack ob...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
verify safepoint Safepoint IR Verifier
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:2990
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2268
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
This is an important base class in LLVM.
Definition: Constant.h:42
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
Tagged union holding either a T or a Error.
Definition: Error.h:481
Error takeError()
Take ownership of the stored error.
Definition: Error.h:608
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:57
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
StringRef getBuffer() const
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A tuple of MDNodes.
Definition: Metadata.h:1731
void addOperand(MDNode *M)
Definition: Metadata.cpp:1431
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:128
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:424
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
Definition: Type.cpp:731
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:612
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:735
bool isNVPTX() const
Tests whether the target is NVPTX (32- or 64-bit).
Definition: Triple.h:855
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:730
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt32Ty(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
A node in a MsgPack Document.
MapDocNode & getMap(bool Convert=false)
Get a MapDocNode for a map node.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
StringRef getString() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
bool readFromBlob(StringRef Blob, bool Multi, function_ref< int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)> Merger=[](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { return -1;})
Read a document from a binary msgpack blob, merging into anything already in the Document.
A DocNode that is a map.
MapTy::iterator find(DocNode Key)
const Elf_Ehdr & getHeader() const
Definition: ELF.h:279
static Expected< ELFFile > create(StringRef Object)
Definition: ELF.h:888
iterator_range< Elf_Note_Iterator > notes(const Elf_Phdr &Phdr, Error &Err) const
Get an iterator range over notes of a program header.
Definition: ELF.h:462
Expected< Elf_Shdr_Range > sections() const
Definition: ELF.h:925
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Definition: ELF.h:28
@ EI_ABIVERSION
Definition: ELF.h:57
@ SHT_NOTE
Definition: ELF.h:1101
@ NT_AMDGPU_METADATA
Definition: ELF.h:1918
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer, StringMap< AMDGPUKernelMetaData > &KernelInfoMap, uint16_t &ELFABIVersion)
Reads AMDGPU specific metadata from the ELF file and propagates the KernelInfoMap.
Definition: Utility.cpp:335
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags, StringRef EnvTargetID)
Check if an image is compatible with current system's environment.
Definition: Utility.cpp:143
std::pair< Constant *, GlobalVariable * > getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data)
Create a constant struct initializer used to register this global at runtime.
Definition: Utility.cpp:38
StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
Definition: Utility.cpp:24
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
Definition: Utility.cpp:77
std::pair< GlobalVariable *, GlobalVariable * > getOffloadEntryArray(Module &M, StringRef SectionName)
Creates a pair of globals used to iterate the array of offloading entries by accessing the section va...
Definition: Utility.cpp:102
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition: Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1291
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
Struct for holding metadata related to AMDGPU kernels, for more information about the metadata and it...
Definition: Utility.h:98
uint32_t SGPRSpillCount
Number of stores from a scalar register to a register allocator created spill location.
Definition: Utility.h:113
uint32_t SGPRCount
Number of scalar registers required by a wavefront.
Definition: Utility.h:108
uint32_t VGPRSpillCount
Number of stores from a vector register to a register allocator created spill location.
Definition: Utility.h:116
uint32_t VGPRCount
Number of vector registers required by each work-item.
Definition: Utility.h:110
uint32_t PrivateSegmentSize
The amount of fixed private address space memory required for a work-item in bytes.
Definition: Utility.h:106
uint32_t GroupSegmentList
The amount of group segment memory required by a work-group in bytes.
Definition: Utility.h:103
uint32_t MaxFlatWorkgroupSize
Maximum flat work-group size supported by the kernel in work-items.
Definition: Utility.h:127
uint32_t WorkgroupSizeHint[3]
Corresponds to the OpenCL work_group_size_hint attribute.
Definition: Utility.h:123
uint32_t AGPRCount
Number of accumulator registers required by each work-item.
Definition: Utility.h:118
uint32_t RequestedWorkgroupSize[3]
Corresponds to the OpenCL reqd_work_group_size attribute.
Definition: Utility.h:120