LLVM 22.0.0git
OffloadWrapper.cpp
Go to the documentation of this file.
1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/Twine.h"
16#include "llvm/IR/Constants.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/Type.h"
24#include "llvm/Support/Error.h"
30
31#include <memory>
32#include <utility>
33
34using namespace llvm;
35using namespace llvm::object;
36using namespace llvm::offloading;
37
38namespace {
39/// Magic number that begins the section containing the CUDA fatbinary.
40constexpr unsigned CudaFatMagic = 0x466243b1;
41constexpr unsigned HIPFatMagic = 0x48495046;
42
44 return M.getDataLayout().getIntPtrType(M.getContext());
45}
46
47// struct __tgt_device_image {
48// void *ImageStart;
49// void *ImageEnd;
50// __tgt_offload_entry *EntriesBegin;
51// __tgt_offload_entry *EntriesEnd;
52// };
53StructType *getDeviceImageTy(Module &M) {
54 LLVMContext &C = M.getContext();
55 StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image");
56 if (!ImageTy)
57 ImageTy =
58 StructType::create("__tgt_device_image", PointerType::getUnqual(C),
61 return ImageTy;
62}
63
64PointerType *getDeviceImagePtrTy(Module &M) {
65 return PointerType::getUnqual(M.getContext());
66}
67
68// struct __tgt_bin_desc {
69// int32_t NumDeviceImages;
70// __tgt_device_image *DeviceImages;
71// __tgt_offload_entry *HostEntriesBegin;
72// __tgt_offload_entry *HostEntriesEnd;
73// };
74StructType *getBinDescTy(Module &M) {
75 LLVMContext &C = M.getContext();
76 StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc");
77 if (!DescTy)
78 DescTy = StructType::create(
79 "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M),
81 return DescTy;
82}
83
84PointerType *getBinDescPtrTy(Module &M) {
85 return PointerType::getUnqual(M.getContext());
86}
87
88/// Creates binary descriptor for the given device images. Binary descriptor
89/// is an object that is passed to the offloading runtime at program startup
90/// and it describes all device images available in the executable or shared
91/// library. It is defined as follows
92///
93/// __attribute__((visibility("hidden")))
94/// extern __tgt_offload_entry *__start_omp_offloading_entries;
95/// __attribute__((visibility("hidden")))
96/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
97///
98/// static const char Image0[] = { <Bufs.front() contents> };
99/// ...
100/// static const char ImageN[] = { <Bufs.back() contents> };
101///
102/// static const __tgt_device_image Images[] = {
103/// {
104/// Image0, /*ImageStart*/
105/// Image0 + sizeof(Image0), /*ImageEnd*/
106/// __start_omp_offloading_entries, /*EntriesBegin*/
107/// __stop_omp_offloading_entries /*EntriesEnd*/
108/// },
109/// ...
110/// {
111/// ImageN, /*ImageStart*/
112/// ImageN + sizeof(ImageN), /*ImageEnd*/
113/// __start_omp_offloading_entries, /*EntriesBegin*/
114/// __stop_omp_offloading_entries /*EntriesEnd*/
115/// }
116/// };
117///
118/// static const __tgt_bin_desc BinDesc = {
119/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
120/// Images, /*DeviceImages*/
121/// __start_omp_offloading_entries, /*HostEntriesBegin*/
122/// __stop_omp_offloading_entries /*HostEntriesEnd*/
123/// };
124///
125/// Global variable that represents BinDesc is returned.
126GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
127 EntryArrayTy EntryArray, StringRef Suffix,
128 bool Relocatable) {
129 LLVMContext &C = M.getContext();
130 auto [EntriesB, EntriesE] = EntryArray;
131
132 auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
133 Constant *ZeroZero[] = {Zero, Zero};
134
135 // Create initializer for the images array.
136 SmallVector<Constant *, 4u> ImagesInits;
137 ImagesInits.reserve(Bufs.size());
138 for (ArrayRef<char> Buf : Bufs) {
139 // We embed the full offloading entry so the binary utilities can parse it.
140 auto *Data = ConstantDataArray::get(C, Buf);
141 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
143 ".omp_offloading.device_image" + Suffix);
145 Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
146 : ".llvm.offloading");
148
149 StringRef Binary(Buf.data(), Buf.size());
151 "Invalid binary format");
152
153 // The device image struct contains the pointer to the beginning and end of
154 // the image stored inside of the offload binary. There should only be one
155 // of these for each buffer so we parse it out manually.
156 const auto *Header =
157 reinterpret_cast<const object::OffloadBinary::Header *>(
158 Binary.bytes_begin());
159 const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>(
160 Binary.bytes_begin() + Header->EntryOffset);
161
162 auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset);
163 auto *Size =
164 ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
165 Constant *ZeroBegin[] = {Zero, Begin};
166 Constant *ZeroSize[] = {Zero, Size};
167
168 auto *ImageB =
169 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin);
170 auto *ImageE =
171 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
172
173 ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
174 ImageE, EntriesB, EntriesE));
175 }
176
177 // Then create images array.
178 auto *ImagesData = ConstantArray::get(
179 ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits);
180
181 auto *Images =
182 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
184 ".omp_offloading.device_images" + Suffix);
185 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
186
187 auto *ImagesB =
188 ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero);
189
190 // And finally create the binary descriptor object.
191 auto *DescInit = ConstantStruct::get(
192 getBinDescTy(M),
193 ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
194 EntriesB, EntriesE);
195
196 return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
198 ".omp_offloading.descriptor" + Suffix);
199}
200
201Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
202 StringRef Suffix) {
203 LLVMContext &C = M.getContext();
204 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
205 auto *Func =
207 ".omp_offloading.descriptor_unreg" + Suffix, &M);
208 Func->setSection(".text.startup");
209
210 // Get __tgt_unregister_lib function declaration.
211 auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
212 /*isVarArg*/ false);
213 FunctionCallee UnRegFuncC =
214 M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
215
216 // Construct function body
217 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
218 Builder.CreateCall(UnRegFuncC, BinDesc);
219 Builder.CreateRetVoid();
220
221 return Func;
222}
223
224void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
225 StringRef Suffix) {
226 LLVMContext &C = M.getContext();
227 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
229 ".omp_offloading.descriptor_reg" + Suffix, &M);
230 Func->setSection(".text.startup");
231
232 // Get __tgt_register_lib function declaration.
233 auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
234 /*isVarArg*/ false);
235 FunctionCallee RegFuncC =
236 M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
237
238 auto *AtExitTy = FunctionType::get(
239 Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
240 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
241
242 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
243
244 // Construct function body
245 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
246
247 Builder.CreateCall(RegFuncC, BinDesc);
248
249 // Register the destructors with 'atexit'. This is expected by the CUDA
250 // runtime and ensures that we clean up before dynamic objects are destroyed.
251 // This needs to be done after plugin initialization to ensure that it is
252 // called before the plugin runtime is destroyed.
253 Builder.CreateCall(AtExit, UnregFunc);
254 Builder.CreateRetVoid();
255
256 // Add this function to constructors.
257 appendToGlobalCtors(M, Func, /*Priority=*/101);
258}
259
260// struct fatbin_wrapper {
261// int32_t magic;
262// int32_t version;
263// void *image;
264// void *reserved;
265//};
266StructType *getFatbinWrapperTy(Module &M) {
267 LLVMContext &C = M.getContext();
268 StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper");
269 if (!FatbinTy)
270 FatbinTy = StructType::create(
271 "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C),
273 return FatbinTy;
274}
275
276/// Embed the image \p Image into the module \p M so it can be found by the
277/// runtime.
278GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
279 StringRef Suffix) {
280 LLVMContext &C = M.getContext();
281 llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
282 const llvm::Triple &Triple = M.getTargetTriple();
283
284 // Create the global string containing the fatbinary.
285 StringRef FatbinConstantSection =
286 IsHIP ? ".hip_fatbin"
287 : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
288 auto *Data = ConstantDataArray::get(C, Image);
289 auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
291 ".fatbin_image" + Suffix);
292 Fatbin->setSection(FatbinConstantSection);
293
294 // Create the fatbinary wrapper
295 StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
296 : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
297 : ".nvFatBinSegment";
298 Constant *FatbinWrapper[] = {
299 ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
300 ConstantInt::get(Type::getInt32Ty(C), 1),
303
304 Constant *FatbinInitializer =
305 ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper);
306
307 auto *FatbinDesc =
308 new GlobalVariable(M, getFatbinWrapperTy(M),
309 /*isConstant*/ true, GlobalValue::InternalLinkage,
310 FatbinInitializer, ".fatbin_wrapper" + Suffix);
311 FatbinDesc->setSection(FatbinWrapperSection);
312 FatbinDesc->setAlignment(Align(8));
313
314 return FatbinDesc;
315}
316
317/// Create the register globals function. We will iterate all of the offloading
318/// entries stored at the begin / end symbols and register them according to
319/// their type. This creates the following function in IR:
320///
321/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
322/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
323///
324/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
325/// void *, void *, void *, void *, int *);
326/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
327/// int64_t, int32_t, int32_t);
328///
329/// void __cudaRegisterTest(void **fatbinHandle) {
330/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
331/// entry != &__stop_cuda_offloading_entries; ++entry) {
332/// if (entry->Kind != OFK_CUDA)
333/// continue
334///
335/// if (!entry->Size)
336/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
337/// entry->name, -1, 0, 0, 0, 0, 0);
338/// else
339/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
340/// 0, entry->size, 0, 0);
341/// }
342/// }
343Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
344 EntryArrayTy EntryArray,
345 StringRef Suffix,
346 bool EmitSurfacesAndTextures) {
347 LLVMContext &C = M.getContext();
348 auto [EntriesB, EntriesE] = EntryArray;
349
350 // Get the __cudaRegisterFunction function declaration.
351 PointerType *Int8PtrTy = PointerType::get(C, 0);
352 PointerType *Int8PtrPtrTy = PointerType::get(C, 0);
353 PointerType *Int32PtrTy = PointerType::get(C, 0);
354 auto *RegFuncTy = FunctionType::get(
356 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
357 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
358 /*isVarArg*/ false);
359 FunctionCallee RegFunc = M.getOrInsertFunction(
360 IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
361
362 // Get the __cudaRegisterVar function declaration.
363 auto *RegVarTy = FunctionType::get(
365 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
367 /*isVarArg*/ false);
368 FunctionCallee RegVar = M.getOrInsertFunction(
369 IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
370
371 // Get the __cudaRegisterSurface function declaration.
372 FunctionType *RegManagedVarTy =
374 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
376 /*isVarArg=*/false);
377 FunctionCallee RegManagedVar = M.getOrInsertFunction(
378 IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
379 RegManagedVarTy);
380
381 // Get the __cudaRegisterSurface function declaration.
382 FunctionType *RegSurfaceTy =
384 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
386 /*isVarArg=*/false);
387 FunctionCallee RegSurface = M.getOrInsertFunction(
388 IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy);
389
390 // Get the __cudaRegisterTexture function declaration.
391 FunctionType *RegTextureTy = FunctionType::get(
393 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
395 /*isVarArg=*/false);
396 FunctionCallee RegTexture = M.getOrInsertFunction(
397 IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy);
398
399 auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy,
400 /*isVarArg*/ false);
401 auto *RegGlobalsFn =
403 IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
404 RegGlobalsFn->setSection(".text.startup");
405
406 // Create the loop to register all the entries.
407 IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
408 auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn);
409 auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn);
410 auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn);
411 auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn);
412 auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn);
413 auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn);
414 auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn);
415 auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn);
416 auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn);
417 auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn);
418
419 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
420 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
421 Builder.SetInsertPoint(EntryBB);
422 auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry");
423 auto *AddrPtr =
424 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
425 {ConstantInt::get(Type::getInt32Ty(C), 0),
426 ConstantInt::get(Type::getInt32Ty(C), 4)});
427 auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr");
428 auto *AuxAddrPtr =
429 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
430 {ConstantInt::get(Type::getInt32Ty(C), 0),
431 ConstantInt::get(Type::getInt32Ty(C), 8)});
432 auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr");
433 auto *KindPtr =
434 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
435 {ConstantInt::get(Type::getInt32Ty(C), 0),
436 ConstantInt::get(Type::getInt32Ty(C), 2)});
437 auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind");
438 auto *NamePtr =
439 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
440 {ConstantInt::get(Type::getInt32Ty(C), 0),
441 ConstantInt::get(Type::getInt32Ty(C), 5)});
442 auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name");
443 auto *SizePtr =
444 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
445 {ConstantInt::get(Type::getInt32Ty(C), 0),
446 ConstantInt::get(Type::getInt32Ty(C), 6)});
447 auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size");
448 auto *FlagsPtr =
449 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
450 {ConstantInt::get(Type::getInt32Ty(C), 0),
451 ConstantInt::get(Type::getInt32Ty(C), 3)});
452 auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags");
453 auto *DataPtr =
454 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
455 {ConstantInt::get(Type::getInt32Ty(C), 0),
456 ConstantInt::get(Type::getInt32Ty(C), 7)});
457 auto *Data = Builder.CreateTrunc(
458 Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"),
460 auto *Type = Builder.CreateAnd(
461 Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type");
462
463 // Extract the flags stored in the bit-field and convert them to C booleans.
464 auto *ExternBit = Builder.CreateAnd(
465 Flags, ConstantInt::get(Type::getInt32Ty(C),
467 auto *Extern = Builder.CreateLShr(
468 ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern");
469 auto *ConstantBit = Builder.CreateAnd(
470 Flags, ConstantInt::get(Type::getInt32Ty(C),
472 auto *Const = Builder.CreateLShr(
473 ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant");
474 auto *NormalizedBit = Builder.CreateAnd(
475 Flags, ConstantInt::get(Type::getInt32Ty(C),
477 auto *Normalized = Builder.CreateLShr(
478 NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized");
479 auto *KindCond = Builder.CreateICmpEQ(
480 Kind, ConstantInt::get(Type::getInt16Ty(C),
483 Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
484 Builder.SetInsertPoint(IfKindBB);
485 auto *FnCond = Builder.CreateICmpEQ(
487 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
488
489 // Create kernel registration code.
490 Builder.SetInsertPoint(IfThenBB);
491 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
492 ConstantInt::get(Type::getInt32Ty(C), -1),
493 ConstantPointerNull::get(Int8PtrTy),
494 ConstantPointerNull::get(Int8PtrTy),
495 ConstantPointerNull::get(Int8PtrTy),
496 ConstantPointerNull::get(Int8PtrTy),
497 ConstantPointerNull::get(Int32PtrTy)});
498 Builder.CreateBr(IfEndBB);
499 Builder.SetInsertPoint(IfElseBB);
500
501 auto *Switch = Builder.CreateSwitch(Type, IfEndBB);
502 // Create global variable registration code.
503 Builder.SetInsertPoint(SwGlobalBB);
504 Builder.CreateCall(RegVar,
505 {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
506 Const, ConstantInt::get(Type::getInt32Ty(C), 0)});
507 Builder.CreateBr(IfEndBB);
508 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry),
509 SwGlobalBB);
510
511 // Create managed variable registration code.
512 Builder.SetInsertPoint(SwManagedBB);
513 Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
514 Name, Size, Data});
515 Builder.CreateBr(IfEndBB);
516 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
517 SwManagedBB);
518 // Create surface variable registration code.
519 Builder.SetInsertPoint(SwSurfaceBB);
520 if (EmitSurfacesAndTextures)
521 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
522 Data, Extern});
523 Builder.CreateBr(IfEndBB);
524 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry),
525 SwSurfaceBB);
526
527 // Create texture variable registration code.
528 Builder.SetInsertPoint(SwTextureBB);
529 if (EmitSurfacesAndTextures)
530 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
531 Data, Normalized, Extern});
532 Builder.CreateBr(IfEndBB);
533 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry),
534 SwTextureBB);
535
536 Builder.SetInsertPoint(IfEndBB);
537 auto *NewEntry = Builder.CreateInBoundsGEP(
538 offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1));
539 auto *Cmp = Builder.CreateICmpEQ(
540 NewEntry,
542 ArrayType::get(offloading::getEntryTy(M), 0), EntriesE,
543 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
544 ConstantInt::get(getSizeTTy(M), 0)})));
545 Entry->addIncoming(
547 ArrayType::get(offloading::getEntryTy(M), 0), EntriesB,
548 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
549 ConstantInt::get(getSizeTTy(M), 0)})),
550 &RegGlobalsFn->getEntryBlock());
551 Entry->addIncoming(NewEntry, IfEndBB);
552 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
553 Builder.SetInsertPoint(ExitBB);
554 Builder.CreateRetVoid();
555
556 return RegGlobalsFn;
557}
558
559// Create the constructor and destructor to register the fatbinary with the CUDA
560// runtime.
561void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
562 bool IsHIP, EntryArrayTy EntryArray,
563 StringRef Suffix,
564 bool EmitSurfacesAndTextures) {
565 LLVMContext &C = M.getContext();
566 auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
567 auto *CtorFunc = Function::Create(
569 (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
570 CtorFunc->setSection(".text.startup");
571
572 auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
573 auto *DtorFunc = Function::Create(
575 (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
576 DtorFunc->setSection(".text.startup");
577
578 auto *PtrTy = PointerType::getUnqual(C);
579
580 // Get the __cudaRegisterFatBinary function declaration.
581 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false);
582 FunctionCallee RegFatbin = M.getOrInsertFunction(
583 IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
584 // Get the __cudaRegisterFatBinaryEnd function declaration.
585 auto *RegFatEndTy =
586 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
587 FunctionCallee RegFatbinEnd =
588 M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy);
589 // Get the __cudaUnregisterFatBinary function declaration.
590 auto *UnregFatTy =
591 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
592 FunctionCallee UnregFatbin = M.getOrInsertFunction(
593 IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
594 UnregFatTy);
595
596 auto *AtExitTy =
597 FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false);
598 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
599
600 auto *BinaryHandleGlobal = new llvm::GlobalVariable(
601 M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
603 (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
604
605 // Create the constructor to register this image with the runtime.
606 IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
607 CallInst *Handle = CtorBuilder.CreateCall(
608 RegFatbin,
610 CtorBuilder.CreateAlignedStore(
611 Handle, BinaryHandleGlobal,
612 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
613 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
614 Suffix,
615 EmitSurfacesAndTextures),
616 Handle);
617 if (!IsHIP)
618 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
619 CtorBuilder.CreateCall(AtExit, DtorFunc);
620 CtorBuilder.CreateRetVoid();
621
622 // Create the destructor to unregister the image with the runtime. We cannot
623 // use a standard global destructor after CUDA 9.2 so this must be called by
624 // `atexit()` instead.
625 IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc));
626 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
627 PtrTy, BinaryHandleGlobal,
628 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
629 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
630 DtorBuilder.CreateRetVoid();
631
632 // Add this function to constructors.
633 appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
634}
635
636/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
637struct SYCLWrapper {
638 Module &M;
639 LLVMContext &C;
640 SYCLJITOptions Options;
641
642 StructType *EntryTy = nullptr;
643 StructType *SyclDeviceImageTy = nullptr;
644 StructType *SyclBinDescTy = nullptr;
645
646 SYCLWrapper(Module &M, const SYCLJITOptions &Options)
647 : M(M), C(M.getContext()), Options(Options) {
648 EntryTy = offloading::getEntryTy(M);
649 SyclDeviceImageTy = getSyclDeviceImageTy();
650 SyclBinDescTy = getSyclBinDescTy();
651 }
652
653 IntegerType *getSizeTTy() {
654 switch (M.getDataLayout().getPointerSize()) {
655 case 4:
656 return Type::getInt32Ty(C);
657 case 8:
658 return Type::getInt64Ty(C);
659 }
660 llvm_unreachable("unsupported pointer type size");
661 }
662
663 SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
664 IntegerType *SizeTTy = getSizeTTy();
665 return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
666 ConstantInt::get(SizeTTy, Second)};
667 }
668
669 /// Note: Properties aren't supported and the support is going
670 /// to be added later.
671 /// Creates a structure corresponding to:
672 /// SYCL specific image descriptor type.
673 /// \code
674 /// struct __sycl.tgt_device_image {
675 /// // version of this structure - for backward compatibility;
676 /// // all modifications which change order/type/offsets of existing fields
677 /// // should increment the version.
678 /// uint16_t Version;
679 /// // the kind of offload model the image employs.
680 /// uint8_t OffloadKind;
681 /// // format of the image data - SPIRV, LLVMIR bitcode, etc
682 /// uint8_t Format;
683 /// // null-terminated string representation of the device's target
684 /// // architecture
685 /// const char *Arch;
686 /// // a null-terminated string; target- and compiler-specific options
687 /// // which are suggested to use to "compile" program at runtime
688 /// const char *CompileOptions;
689 /// // a null-terminated string; target- and compiler-specific options
690 /// // which are suggested to use to "link" program at runtime
691 /// const char *LinkOptions;
692 /// // Pointer to the device binary image start
693 /// void *ImageStart;
694 /// // Pointer to the device binary image end
695 /// void *ImageEnd;
696 /// // the entry table
697 /// __tgt_offload_entry *EntriesBegin;
698 /// __tgt_offload_entry *EntriesEnd;
699 /// const char *PropertiesBegin;
700 /// const char *PropertiesEnd;
701 /// };
702 /// \endcode
703 StructType *getSyclDeviceImageTy() {
704 return StructType::create(
705 {
706 Type::getInt16Ty(C), // Version
707 Type::getInt8Ty(C), // OffloadKind
708 Type::getInt8Ty(C), // Format
709 PointerType::getUnqual(C), // Arch
710 PointerType::getUnqual(C), // CompileOptions
711 PointerType::getUnqual(C), // LinkOptions
712 PointerType::getUnqual(C), // ImageStart
713 PointerType::getUnqual(C), // ImageEnd
714 PointerType::getUnqual(C), // EntriesBegin
715 PointerType::getUnqual(C), // EntriesEnd
716 PointerType::getUnqual(C), // PropertiesBegin
717 PointerType::getUnqual(C) // PropertiesEnd
718 },
719 "__sycl.tgt_device_image");
720 }
721
722 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
723 /// to:
724 ///
725 /// \code
726 /// struct __sycl.tgt_bin_desc {
727 /// // version of this structure - for backward compatibility;
728 /// // all modifications which change order/type/offsets of existing fields
729 /// // should increment the version.
730 /// uint16_t Version;
731 /// uint16_t NumDeviceImages;
732 /// __sycl.tgt_device_image *DeviceImages;
733 /// // the offload entry table
734 /// __tgt_offload_entry *HostEntriesBegin;
735 /// __tgt_offload_entry *HostEntriesEnd;
736 /// };
737 /// \endcode
738 StructType *getSyclBinDescTy() {
739 return StructType::create(
740 {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
741 PointerType::getUnqual(C), PointerType::getUnqual(C)},
742 "__sycl.tgt_bin_desc");
743 }
744
745 /// Adds a global readonly variable that is initialized by given
746 /// \p Initializer to the module.
747 GlobalVariable *addGlobalArrayVariable(const Twine &Name,
748 ArrayRef<char> Initializer,
749 const Twine &Section = "") {
750 auto *Arr = ConstantDataArray::get(M.getContext(), Initializer);
751 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
752 GlobalVariable::InternalLinkage, Arr, Name);
753 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
754
755 SmallVector<char, 32> NameBuf;
756 auto SectionName = Section.toStringRef(NameBuf);
757 if (!SectionName.empty())
758 Var->setSection(SectionName);
759 return Var;
760 }
761
762 /// Adds given \p Buf as a global variable into the module.
763 /// \returns Pair of pointers that point at the beginning and the end of the
764 /// variable.
765 std::pair<Constant *, Constant *>
766 addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
767 const Twine &Section = "") {
768 auto *Var = addGlobalArrayVariable(Name, Buf, Section);
769 auto *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
770 getSizetConstPair(0, 0));
771 auto *ImageE = ConstantExpr::getGetElementPtr(
772 Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
773 return std::make_pair(ImageB, ImageE);
774 }
775
776 /// Adds given \p Data as constant byte array in the module.
777 /// \returns Constant pointer to the added data. The pointer type does not
778 /// carry size information.
779 Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
780 auto *Var = addGlobalArrayVariable(Name, Data);
781 auto *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
782 getSizetConstPair(0, 0));
783 return DataPtr;
784 }
785
786 /// Creates a global variable of const char* type and creates an
787 /// initializer that initializes it with \p Str.
788 ///
789 /// \returns Link-time constant pointer (constant expr) to that
790 /// variable.
791 Constant *addStringToModule(StringRef Str, const Twine &Name) {
792 auto *Arr = ConstantDataArray::getString(C, Str);
793 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
794 GlobalVariable::InternalLinkage, Arr, Name);
795 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
796 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
797 Constant *ZeroZero[] = {Zero, Zero};
798 return ConstantExpr::getGetElementPtr(Var->getValueType(), Var, ZeroZero);
799 }
800
801 /// Each image contains its own set of symbols, which may contain different
802 /// symbols than other images. This function constructs an array of
803 /// symbol entries for a particular image.
804 ///
805 /// \returns Pointers to the beginning and end of the array.
806 std::pair<Constant *, Constant *>
807 initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
808 SmallVector<Constant *> EntriesInits;
809 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
810 Entries, /*BufferName*/ "", /*RequiresNullTerminator*/ false);
811 for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
812 GlobalVariable *GV =
813 emitOffloadingEntry(M, /*Kind*/ OffloadKind::OFK_SYCL,
814 Constant::getNullValue(PointerType::getUnqual(C)),
815 /*Name*/ *LI, /*Size*/ 0,
816 /*Flags*/ 0, /*Data*/ 0);
817 EntriesInits.push_back(GV->getInitializer());
818 }
819
820 auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()),
821 EntriesInits);
822 auto *EntriesGV = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
823 GlobalVariable::InternalLinkage, Arr,
824 OffloadKindTag + "entries_arr");
825
826 auto *EntriesB = ConstantExpr::getGetElementPtr(
827 EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
828 auto *EntriesE = ConstantExpr::getGetElementPtr(
829 EntriesGV->getValueType(), EntriesGV,
830 getSizetConstPair(0, EntriesInits.size()));
831 return std::make_pair(EntriesB, EntriesE);
832 }
833
834 Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
835 StringRef OffloadKindTag) {
836 // Note: Intel DPC++ compiler had 2 versions of this structure
837 // and clang++ has a third different structure. To avoid ABI incompatibility
838 // between generated device images the Version here starts from 3.
839 constexpr uint16_t DeviceImageStructVersion = 3;
841 ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
842 Constant *OffloadKindConstant = ConstantInt::get(
843 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
844 Constant *ImageKindConstant = ConstantInt::get(
845 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
846 StringRef Triple = OB.getString("triple");
847 Constant *TripleConstant =
848 addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
849 Constant *CompileOptions =
850 addStringToModule(Options.CompileOptions,
851 Twine(OffloadKindTag) + "opts.compile." + ImageID);
852 Constant *LinkOptions = addStringToModule(
853 Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
854
855 // Note: NULL for now.
856 std::pair<Constant *, Constant *> PropertiesConstants = {
857 Constant::getNullValue(PointerType::getUnqual(C)),
858 Constant::getNullValue(PointerType::getUnqual(C))};
859
860 StringRef RawImage = OB.getImage();
861 std::pair<Constant *, Constant *> Binary = addArrayToModule(
862 ArrayRef<char>(RawImage.begin(), RawImage.end()),
863 Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
864
865 // For SYCL images offload entries are defined here per image.
866 std::pair<Constant *, Constant *> ImageEntriesPtrs =
867 initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
868 Constant *WrappedBinary = ConstantStruct::get(
869 SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
870 TripleConstant, CompileOptions, LinkOptions, Binary.first,
871 Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
872 PropertiesConstants.first, PropertiesConstants.second);
873
874 return WrappedBinary;
875 }
876
877 GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
878 StringRef OffloadKindTag) {
879 auto *ImagesData = ConstantArray::get(
880 ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
881 auto *ImagesGV =
882 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
884 Twine(OffloadKindTag) + "device_images");
885 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
886
887 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
888 Constant *ZeroZero[] = {Zero, Zero};
889 auto *ImagesB = ConstantExpr::getGetElementPtr(ImagesGV->getValueType(),
890 ImagesGV, ZeroZero);
891
892 Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
893 Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
894 static constexpr uint16_t BinDescStructVersion = 1;
895 auto *DescInit = ConstantStruct::get(
896 SyclBinDescTy,
897 ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
898 ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesB,
899 EntriesB, EntriesE);
900
901 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
903 Twine(OffloadKindTag) + "descriptor");
904 }
905
906 /// Creates binary descriptor for the given device images. Binary descriptor
907 /// is an object that is passed to the offloading runtime at program startup
908 /// and it describes all device images available in the executable or shared
909 /// library. It is defined as follows:
910 ///
911 /// \code
912 /// __attribute__((visibility("hidden")))
913 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
914 /// ...
915 /// __attribute__((visibility("hidden")))
916 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
917 ///
918 /// __attribute__((visibility("hidden")))
919 /// extern const char *CompileOptions = "...";
920 /// ...
921 /// __attribute__((visibility("hidden")))
922 /// extern const char *LinkOptions = "...";
923 /// ...
924 ///
925 /// static const char Image0[] = { ... };
926 /// ...
927 /// static const char ImageN[] = { ... };
928 ///
929 /// static const __sycl.tgt_device_image Images[] = {
930 /// {
931 /// Version, // Version
932 /// OffloadKind, // OffloadKind
933 /// Format, // Format of the image.
934 // TripleString, // Arch
935 /// CompileOptions, // CompileOptions
936 /// LinkOptions, // LinkOptions
937 /// Image0, // ImageStart
938 /// Image0 + IMAGE0_SIZE, // ImageEnd
939 /// __sycl_offload_entries_arr0, // EntriesBegin
940 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
941 /// NULL, // PropertiesBegin
942 /// NULL, // PropertiesEnd
943 /// },
944 /// ...
945 /// };
946 ///
947 /// static const __sycl.tgt_bin_desc FatbinDesc = {
948 /// Version, //Version
949 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
950 /// Images, //DeviceImages
951 /// NULL, //HostEntriesBegin
952 /// NULL //HostEntriesEnd
953 /// };
954 /// \endcode
955 ///
956 /// \returns Global variable that represents FatbinDesc.
957 GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
958 StringRef OffloadKindTag = ".sycl_offloading.";
959 SmallVector<Constant *> WrappedImages;
960 WrappedImages.reserve(OffloadFiles.size());
961 for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
962 WrappedImages.push_back(
963 wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
964
965 return combineWrappedImages(WrappedImages, OffloadKindTag);
966 }
967
968 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
969 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
971 Twine("sycl") + ".descriptor_reg", &M);
972 Func->setSection(".text.startup");
973
974 // Get RegFuncName function declaration.
975 auto *RegFuncTy =
976 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
977 /*isVarArg=*/false);
978 FunctionCallee RegFuncC =
979 M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
980
981 // Construct function body
982 IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
983 Builder.CreateCall(RegFuncC, FatbinDesc);
984 Builder.CreateRetVoid();
985
986 // Add this function to constructors.
987 appendToGlobalCtors(M, Func, /*Priority*/ 1);
988 }
989
990 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
991 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
993 "sycl.descriptor_unreg", &M);
994 Func->setSection(".text.startup");
995
996 // Get UnregFuncName function declaration.
997 auto *UnRegFuncTy =
998 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
999 /*isVarArg=*/false);
1000 FunctionCallee UnRegFuncC =
1001 M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
1002
1003 // Construct function body
1004 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
1005 Builder.CreateCall(UnRegFuncC, FatbinDesc);
1006 Builder.CreateRetVoid();
1007
1008 // Add this function to global destructors.
1009 appendToGlobalDtors(M, Func, /*Priority*/ 1);
1010 }
1011}; // end of SYCLWrapper
1012
1013} // namespace
1014
1016 EntryArrayTy EntryArray,
1017 llvm::StringRef Suffix, bool Relocatable) {
1019 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
1020 if (!Desc)
1022 "No binary descriptors created.");
1023 createRegisterFunction(M, Desc, Suffix);
1024 return Error::success();
1025}
1026
1028 EntryArrayTy EntryArray,
1029 llvm::StringRef Suffix,
1030 bool EmitSurfacesAndTextures) {
1031 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix);
1032 if (!Desc)
1034 "No fatbin section created.");
1035
1036 createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix,
1037 EmitSurfacesAndTextures);
1038 return Error::success();
1039}
1040
1042 EntryArrayTy EntryArray, llvm::StringRef Suffix,
1043 bool EmitSurfacesAndTextures) {
1044 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix);
1045 if (!Desc)
1047 "No fatbin section created.");
1048
1049 createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix,
1050 EmitSurfacesAndTextures);
1051 return Error::success();
1052}
1053
1056 SYCLWrapper W(M, Options);
1057 MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
1058 /*Identifier*/ "");
1059 SmallVector<OffloadFile> OffloadFiles;
1060 if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
1061 return E;
1062
1063 GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
1064 if (!Desc)
1066 "No binary descriptors created.");
1067
1068 W.createRegisterFatbinFunction(Desc);
1069 W.createUnregisterFunction(Desc);
1070 return Error::success();
1071}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file defines the SmallVector class.
@ ConstantBit
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
iterator begin() const
Definition ArrayRef.h:131
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
iterator begin() const
Definition StringRef.h:112
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
static LLVM_ABI StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
Definition Type.cpp:739
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition Type.cpp:620
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isMacOSX() const
Is this a Mac OS X triple.
Definition Triple.h:568
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:296
static uint64_t getAlignment()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char SectionName[]
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Definition CoroShape.h:31
LLVM_ABI Error extractOffloadBinaries(MemoryBufferRef Buffer, SmallVectorImpl< OffloadFile > &Binaries)
Extracts embedded device offloading code from a memory Buffer to a list of Binaries.
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
Definition Utility.cpp:86
LLVM_ABI StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
Definition Utility.cpp:26
LLVM_ABI llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::ArrayRef< char > Buffer, SYCLJITOptions Options=SYCLJITOptions())
Wraps OffloadBinaries in the given Buffers into the module M as global symbols and registers the imag...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
Definition Utility.h:58
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
Definition Utility.h:60
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
Definition Utility.h:66
@ OffloadGlobalEntry
Mark the entry as a global entry.
Definition Utility.h:54
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
Definition Utility.h:56
@ OffloadGlobalExtern
Mark the entry as being extern.
Definition Utility.h:62
@ OffloadGlobalConstant
Mark the entry as being constant.
Definition Utility.h:64
LLVM_ABI llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
LLVM_ABI llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
LLVM_ABI llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:157
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ offload_binary
LLVM offload object file.
Definition Magic.h:58