LLVM 22.0.0git
OffloadWrapper.cpp
Go to the documentation of this file.
1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/Twine.h"
16#include "llvm/IR/Constants.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/Type.h"
24#include "llvm/Support/Error.h"
30
31#include <memory>
32#include <utility>
33
34using namespace llvm;
35using namespace llvm::object;
36using namespace llvm::offloading;
37
38namespace {
39/// Magic number that begins the section containing the CUDA fatbinary.
40constexpr unsigned CudaFatMagic = 0x466243b1;
41constexpr unsigned HIPFatMagic = 0x48495046;
42
44 return M.getDataLayout().getIntPtrType(M.getContext());
45}
46
47// struct __tgt_device_image {
48// void *ImageStart;
49// void *ImageEnd;
50// __tgt_offload_entry *EntriesBegin;
51// __tgt_offload_entry *EntriesEnd;
52// };
53StructType *getDeviceImageTy(Module &M) {
54 LLVMContext &C = M.getContext();
55 StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image");
56 if (!ImageTy)
57 ImageTy =
58 StructType::create("__tgt_device_image", PointerType::getUnqual(C),
61 return ImageTy;
62}
63
64PointerType *getDeviceImagePtrTy(Module &M) {
65 return PointerType::getUnqual(M.getContext());
66}
67
68// struct __tgt_bin_desc {
69// int32_t NumDeviceImages;
70// __tgt_device_image *DeviceImages;
71// __tgt_offload_entry *HostEntriesBegin;
72// __tgt_offload_entry *HostEntriesEnd;
73// };
74StructType *getBinDescTy(Module &M) {
75 LLVMContext &C = M.getContext();
76 StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc");
77 if (!DescTy)
78 DescTy = StructType::create(
79 "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M),
81 return DescTy;
82}
83
84PointerType *getBinDescPtrTy(Module &M) {
85 return PointerType::getUnqual(M.getContext());
86}
87
88/// Creates binary descriptor for the given device images. Binary descriptor
89/// is an object that is passed to the offloading runtime at program startup
90/// and it describes all device images available in the executable or shared
91/// library. It is defined as follows
92///
93/// __attribute__((visibility("hidden")))
94/// extern __tgt_offload_entry *__start_omp_offloading_entries;
95/// __attribute__((visibility("hidden")))
96/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
97///
98/// static const char Image0[] = { <Bufs.front() contents> };
99/// ...
100/// static const char ImageN[] = { <Bufs.back() contents> };
101///
102/// static const __tgt_device_image Images[] = {
103/// {
104/// Image0, /*ImageStart*/
105/// Image0 + sizeof(Image0), /*ImageEnd*/
106/// __start_omp_offloading_entries, /*EntriesBegin*/
107/// __stop_omp_offloading_entries /*EntriesEnd*/
108/// },
109/// ...
110/// {
111/// ImageN, /*ImageStart*/
112/// ImageN + sizeof(ImageN), /*ImageEnd*/
113/// __start_omp_offloading_entries, /*EntriesBegin*/
114/// __stop_omp_offloading_entries /*EntriesEnd*/
115/// }
116/// };
117///
118/// static const __tgt_bin_desc BinDesc = {
119/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
120/// Images, /*DeviceImages*/
121/// __start_omp_offloading_entries, /*HostEntriesBegin*/
122/// __stop_omp_offloading_entries /*HostEntriesEnd*/
123/// };
124///
125/// Global variable that represents BinDesc is returned.
126GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
127 EntryArrayTy EntryArray, StringRef Suffix,
128 bool Relocatable) {
129 LLVMContext &C = M.getContext();
130 auto [EntriesB, EntriesE] = EntryArray;
131
132 auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
133 Constant *ZeroZero[] = {Zero, Zero};
134
135 // Create initializer for the images array.
136 SmallVector<Constant *, 4u> ImagesInits;
137 ImagesInits.reserve(Bufs.size());
138 for (ArrayRef<char> Buf : Bufs) {
139 // We embed the full offloading entry so the binary utilities can parse it.
140 auto *Data = ConstantDataArray::get(C, Buf);
141 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
143 ".omp_offloading.device_image" + Suffix);
145 Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
146 : ".llvm.offloading");
148
149 StringRef Binary(Buf.data(), Buf.size());
150
151 uint64_t BeginOffset = 0;
152 uint64_t EndOffset = Binary.size();
153
154 // Optionally use an offload binary for its offload dumping support.
155 // The device image struct contains the pointer to the beginning and end of
156 // the image stored inside of the offload binary. There should only be one
157 // of these for each buffer so we parse it out manually.
159 const auto *Header =
160 reinterpret_cast<const object::OffloadBinary::Header *>(
161 Binary.bytes_begin());
162 const auto *Entry =
163 reinterpret_cast<const object::OffloadBinary::Entry *>(
164 Binary.bytes_begin() + Header->EntryOffset);
165 BeginOffset = Entry->ImageOffset;
166 EndOffset = Entry->ImageOffset + Entry->ImageSize;
167 }
168
169 auto *Begin = ConstantInt::get(getSizeTTy(M), BeginOffset);
170 auto *Size = ConstantInt::get(getSizeTTy(M), EndOffset);
171 Constant *ZeroBegin[] = {Zero, Begin};
172 Constant *ZeroSize[] = {Zero, Size};
173
174 auto *ImageB =
175 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin);
176 auto *ImageE =
177 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
178
179 ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
180 ImageE, EntriesB, EntriesE));
181 }
182
183 // Then create images array.
184 auto *ImagesData = ConstantArray::get(
185 ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits);
186
187 auto *Images =
188 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
190 ".omp_offloading.device_images" + Suffix);
191 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
192
193 auto *ImagesB =
194 ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero);
195
196 // And finally create the binary descriptor object.
197 auto *DescInit = ConstantStruct::get(
198 getBinDescTy(M),
199 ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
200 EntriesB, EntriesE);
201
202 return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
204 ".omp_offloading.descriptor" + Suffix);
205}
206
207Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
208 StringRef Suffix) {
209 LLVMContext &C = M.getContext();
210 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
211 auto *Func =
213 ".omp_offloading.descriptor_unreg" + Suffix, &M);
214 Func->setSection(".text.startup");
215
216 // Get __tgt_unregister_lib function declaration.
217 auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
218 /*isVarArg*/ false);
219 FunctionCallee UnRegFuncC =
220 M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
221
222 // Construct function body
223 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
224 Builder.CreateCall(UnRegFuncC, BinDesc);
225 Builder.CreateRetVoid();
226
227 return Func;
228}
229
230void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
231 StringRef Suffix) {
232 LLVMContext &C = M.getContext();
233 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
235 ".omp_offloading.descriptor_reg" + Suffix, &M);
236 Func->setSection(".text.startup");
237
238 // Get __tgt_register_lib function declaration.
239 auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
240 /*isVarArg*/ false);
241 FunctionCallee RegFuncC =
242 M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
243
244 auto *AtExitTy = FunctionType::get(
245 Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
246 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
247
248 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
249
250 // Construct function body
251 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
252
253 Builder.CreateCall(RegFuncC, BinDesc);
254
255 // Register the destructors with 'atexit'. This is expected by the CUDA
256 // runtime and ensures that we clean up before dynamic objects are destroyed.
257 // This needs to be done after plugin initialization to ensure that it is
258 // called before the plugin runtime is destroyed.
259 Builder.CreateCall(AtExit, UnregFunc);
260 Builder.CreateRetVoid();
261
262 // Add this function to constructors.
263 appendToGlobalCtors(M, Func, /*Priority=*/101);
264}
265
266// struct fatbin_wrapper {
267// int32_t magic;
268// int32_t version;
269// void *image;
270// void *reserved;
271//};
272StructType *getFatbinWrapperTy(Module &M) {
273 LLVMContext &C = M.getContext();
274 StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper");
275 if (!FatbinTy)
276 FatbinTy = StructType::create(
277 "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C),
279 return FatbinTy;
280}
281
282/// Embed the image \p Image into the module \p M so it can be found by the
283/// runtime.
284GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
285 StringRef Suffix) {
286 LLVMContext &C = M.getContext();
287 llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
288 const llvm::Triple &Triple = M.getTargetTriple();
289
290 // Create the global string containing the fatbinary.
291 StringRef FatbinConstantSection =
292 IsHIP ? ".hip_fatbin"
293 : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
294 auto *Data = ConstantDataArray::get(C, Image);
295 auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
297 ".fatbin_image" + Suffix);
298 Fatbin->setSection(FatbinConstantSection);
299
300 // Create the fatbinary wrapper
301 StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
302 : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
303 : ".nvFatBinSegment";
304 Constant *FatbinWrapper[] = {
305 ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
306 ConstantInt::get(Type::getInt32Ty(C), 1),
309
310 Constant *FatbinInitializer =
311 ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper);
312
313 auto *FatbinDesc =
314 new GlobalVariable(M, getFatbinWrapperTy(M),
315 /*isConstant*/ true, GlobalValue::InternalLinkage,
316 FatbinInitializer, ".fatbin_wrapper" + Suffix);
317 FatbinDesc->setSection(FatbinWrapperSection);
318 FatbinDesc->setAlignment(Align(8));
319
320 return FatbinDesc;
321}
322
323/// Create the register globals function. We will iterate all of the offloading
324/// entries stored at the begin / end symbols and register them according to
325/// their type. This creates the following function in IR:
326///
327/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
328/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
329///
330/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
331/// void *, void *, void *, void *, int *);
332/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
333/// int64_t, int32_t, int32_t);
334///
335/// void __cudaRegisterTest(void **fatbinHandle) {
336/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
337/// entry != &__stop_cuda_offloading_entries; ++entry) {
338/// if (entry->Kind != OFK_CUDA)
339/// continue
340///
341/// if (!entry->Size)
342/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
343/// entry->name, -1, 0, 0, 0, 0, 0);
344/// else
345/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
346/// 0, entry->size, 0, 0);
347/// }
348/// }
349Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
350 EntryArrayTy EntryArray,
351 StringRef Suffix,
352 bool EmitSurfacesAndTextures) {
353 LLVMContext &C = M.getContext();
354 auto [EntriesB, EntriesE] = EntryArray;
355
356 // Get the __cudaRegisterFunction function declaration.
357 PointerType *Int8PtrTy = PointerType::get(C, 0);
358 PointerType *Int8PtrPtrTy = PointerType::get(C, 0);
359 PointerType *Int32PtrTy = PointerType::get(C, 0);
360 auto *RegFuncTy = FunctionType::get(
362 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
363 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
364 /*isVarArg*/ false);
365 FunctionCallee RegFunc = M.getOrInsertFunction(
366 IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
367
368 // Get the __cudaRegisterVar function declaration.
369 auto *RegVarTy = FunctionType::get(
371 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
373 /*isVarArg*/ false);
374 FunctionCallee RegVar = M.getOrInsertFunction(
375 IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
376
377 // Get the __cudaRegisterSurface function declaration.
378 FunctionType *RegManagedVarTy =
380 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
382 /*isVarArg=*/false);
383 FunctionCallee RegManagedVar = M.getOrInsertFunction(
384 IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
385 RegManagedVarTy);
386
387 // Get the __cudaRegisterSurface function declaration.
388 FunctionType *RegSurfaceTy =
390 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
392 /*isVarArg=*/false);
393 FunctionCallee RegSurface = M.getOrInsertFunction(
394 IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy);
395
396 // Get the __cudaRegisterTexture function declaration.
397 FunctionType *RegTextureTy = FunctionType::get(
399 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
401 /*isVarArg=*/false);
402 FunctionCallee RegTexture = M.getOrInsertFunction(
403 IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy);
404
405 auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy,
406 /*isVarArg*/ false);
407 auto *RegGlobalsFn =
409 IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
410 RegGlobalsFn->setSection(".text.startup");
411
412 // Create the loop to register all the entries.
413 IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
414 auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn);
415 auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn);
416 auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn);
417 auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn);
418 auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn);
419 auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn);
420 auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn);
421 auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn);
422 auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn);
423 auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn);
424
425 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
426 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
427 Builder.SetInsertPoint(EntryBB);
428 auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry");
429 auto *AddrPtr =
430 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
431 {ConstantInt::get(Type::getInt32Ty(C), 0),
432 ConstantInt::get(Type::getInt32Ty(C), 4)});
433 auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr");
434 auto *AuxAddrPtr =
435 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
436 {ConstantInt::get(Type::getInt32Ty(C), 0),
437 ConstantInt::get(Type::getInt32Ty(C), 8)});
438 auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr");
439 auto *KindPtr =
440 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
441 {ConstantInt::get(Type::getInt32Ty(C), 0),
442 ConstantInt::get(Type::getInt32Ty(C), 2)});
443 auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind");
444 auto *NamePtr =
445 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
446 {ConstantInt::get(Type::getInt32Ty(C), 0),
447 ConstantInt::get(Type::getInt32Ty(C), 5)});
448 auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name");
449 auto *SizePtr =
450 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
451 {ConstantInt::get(Type::getInt32Ty(C), 0),
452 ConstantInt::get(Type::getInt32Ty(C), 6)});
453 auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size");
454 auto *FlagsPtr =
455 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
456 {ConstantInt::get(Type::getInt32Ty(C), 0),
457 ConstantInt::get(Type::getInt32Ty(C), 3)});
458 auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags");
459 auto *DataPtr =
460 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
461 {ConstantInt::get(Type::getInt32Ty(C), 0),
462 ConstantInt::get(Type::getInt32Ty(C), 7)});
463 auto *Data = Builder.CreateTrunc(
464 Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"),
466 auto *Type = Builder.CreateAnd(
467 Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type");
468
469 // Extract the flags stored in the bit-field and convert them to C booleans.
470 auto *ExternBit = Builder.CreateAnd(
471 Flags, ConstantInt::get(Type::getInt32Ty(C),
473 auto *Extern = Builder.CreateLShr(
474 ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern");
475 auto *ConstantBit = Builder.CreateAnd(
476 Flags, ConstantInt::get(Type::getInt32Ty(C),
478 auto *Const = Builder.CreateLShr(
479 ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant");
480 auto *NormalizedBit = Builder.CreateAnd(
481 Flags, ConstantInt::get(Type::getInt32Ty(C),
483 auto *Normalized = Builder.CreateLShr(
484 NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized");
485 auto *KindCond = Builder.CreateICmpEQ(
486 Kind, ConstantInt::get(Type::getInt16Ty(C),
489 Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
490 Builder.SetInsertPoint(IfKindBB);
491 auto *FnCond = Builder.CreateICmpEQ(
493 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
494
495 // Create kernel registration code.
496 Builder.SetInsertPoint(IfThenBB);
497 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
498 ConstantInt::get(Type::getInt32Ty(C), -1),
499 ConstantPointerNull::get(Int8PtrTy),
500 ConstantPointerNull::get(Int8PtrTy),
501 ConstantPointerNull::get(Int8PtrTy),
502 ConstantPointerNull::get(Int8PtrTy),
503 ConstantPointerNull::get(Int32PtrTy)});
504 Builder.CreateBr(IfEndBB);
505 Builder.SetInsertPoint(IfElseBB);
506
507 auto *Switch = Builder.CreateSwitch(Type, IfEndBB);
508 // Create global variable registration code.
509 Builder.SetInsertPoint(SwGlobalBB);
510 Builder.CreateCall(RegVar,
511 {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
512 Const, ConstantInt::get(Type::getInt32Ty(C), 0)});
513 Builder.CreateBr(IfEndBB);
514 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry),
515 SwGlobalBB);
516
517 // Create managed variable registration code.
518 Builder.SetInsertPoint(SwManagedBB);
519 Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
520 Name, Size, Data});
521 Builder.CreateBr(IfEndBB);
522 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
523 SwManagedBB);
524 // Create surface variable registration code.
525 Builder.SetInsertPoint(SwSurfaceBB);
526 if (EmitSurfacesAndTextures)
527 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
528 Data, Extern});
529 Builder.CreateBr(IfEndBB);
530 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry),
531 SwSurfaceBB);
532
533 // Create texture variable registration code.
534 Builder.SetInsertPoint(SwTextureBB);
535 if (EmitSurfacesAndTextures)
536 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
537 Data, Normalized, Extern});
538 Builder.CreateBr(IfEndBB);
539 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry),
540 SwTextureBB);
541
542 Builder.SetInsertPoint(IfEndBB);
543 auto *NewEntry = Builder.CreateInBoundsGEP(
544 offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1));
545 auto *Cmp = Builder.CreateICmpEQ(
546 NewEntry,
548 ArrayType::get(offloading::getEntryTy(M), 0), EntriesE,
549 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
550 ConstantInt::get(getSizeTTy(M), 0)})));
551 Entry->addIncoming(
553 ArrayType::get(offloading::getEntryTy(M), 0), EntriesB,
554 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
555 ConstantInt::get(getSizeTTy(M), 0)})),
556 &RegGlobalsFn->getEntryBlock());
557 Entry->addIncoming(NewEntry, IfEndBB);
558 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
559 Builder.SetInsertPoint(ExitBB);
560 Builder.CreateRetVoid();
561
562 return RegGlobalsFn;
563}
564
565// Create the constructor and destructor to register the fatbinary with the CUDA
566// runtime.
567void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
568 bool IsHIP, EntryArrayTy EntryArray,
569 StringRef Suffix,
570 bool EmitSurfacesAndTextures) {
571 LLVMContext &C = M.getContext();
572 auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
573 auto *CtorFunc = Function::Create(
575 (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
576 CtorFunc->setSection(".text.startup");
577
578 auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
579 auto *DtorFunc = Function::Create(
581 (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
582 DtorFunc->setSection(".text.startup");
583
584 auto *PtrTy = PointerType::getUnqual(C);
585
586 // Get the __cudaRegisterFatBinary function declaration.
587 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false);
588 FunctionCallee RegFatbin = M.getOrInsertFunction(
589 IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
590 // Get the __cudaRegisterFatBinaryEnd function declaration.
591 auto *RegFatEndTy =
592 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
593 FunctionCallee RegFatbinEnd =
594 M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy);
595 // Get the __cudaUnregisterFatBinary function declaration.
596 auto *UnregFatTy =
597 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
598 FunctionCallee UnregFatbin = M.getOrInsertFunction(
599 IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
600 UnregFatTy);
601
602 auto *AtExitTy =
603 FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false);
604 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
605
606 auto *BinaryHandleGlobal = new llvm::GlobalVariable(
607 M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
609 (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
610
611 // Create the constructor to register this image with the runtime.
612 IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
613 CallInst *Handle = CtorBuilder.CreateCall(
614 RegFatbin,
616 CtorBuilder.CreateAlignedStore(
617 Handle, BinaryHandleGlobal,
618 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
619 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
620 Suffix,
621 EmitSurfacesAndTextures),
622 Handle);
623 if (!IsHIP)
624 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
625 CtorBuilder.CreateCall(AtExit, DtorFunc);
626 CtorBuilder.CreateRetVoid();
627
628 // Create the destructor to unregister the image with the runtime. We cannot
629 // use a standard global destructor after CUDA 9.2 so this must be called by
630 // `atexit()` instead.
631 IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc));
632 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
633 PtrTy, BinaryHandleGlobal,
634 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
635 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
636 DtorBuilder.CreateRetVoid();
637
638 // Add this function to constructors.
639 appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
640}
641
642/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
643class SYCLWrapper {
644public:
645 SYCLWrapper(Module &M, const SYCLJITOptions &Options)
646 : M(M), C(M.getContext()), Options(Options) {
647 EntryTy = offloading::getEntryTy(M);
648 SyclDeviceImageTy = getSyclDeviceImageTy();
649 SyclBinDescTy = getSyclBinDescTy();
650 }
651
652 /// Creates binary descriptor for the given device images. Binary descriptor
653 /// is an object that is passed to the offloading runtime at program startup
654 /// and it describes all device images available in the executable or shared
655 /// library. It is defined as follows:
656 ///
657 /// \code
658 /// __attribute__((visibility("hidden")))
659 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
660 /// ...
661 /// __attribute__((visibility("hidden")))
662 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
663 ///
664 /// __attribute__((visibility("hidden")))
665 /// extern const char *CompileOptions = "...";
666 /// ...
667 /// __attribute__((visibility("hidden")))
668 /// extern const char *LinkOptions = "...";
669 /// ...
670 ///
671 /// static const char Image0[] = { ... };
672 /// ...
673 /// static const char ImageN[] = { ... };
674 ///
675 /// static const __sycl.tgt_device_image Images[] = {
676 /// {
677 /// Version, // Version
678 /// OffloadKind, // OffloadKind
679 /// Format, // Format of the image.
680 // TripleString, // Arch
681 /// CompileOptions, // CompileOptions
682 /// LinkOptions, // LinkOptions
683 /// Image0, // ImageStart
684 /// Image0 + IMAGE0_SIZE, // ImageEnd
685 /// __sycl_offload_entries_arr0, // EntriesBegin
686 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
687 /// NULL, // PropertiesBegin
688 /// NULL, // PropertiesEnd
689 /// },
690 /// ...
691 /// };
692 ///
693 /// static const __sycl.tgt_bin_desc FatbinDesc = {
694 /// Version, //Version
695 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
696 /// Images, //DeviceImages
697 /// NULL, //HostEntriesBegin
698 /// NULL //HostEntriesEnd
699 /// };
700 /// \endcode
701 ///
702 /// \returns Global variable that represents FatbinDesc.
703 GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
704 StringRef OffloadKindTag = ".sycl_offloading.";
705 SmallVector<Constant *> WrappedImages;
706 WrappedImages.reserve(OffloadFiles.size());
707 for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
708 WrappedImages.push_back(
709 wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
710
711 return combineWrappedImages(WrappedImages, OffloadKindTag);
712 }
713
714 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
715 FunctionType *FuncTy =
716 FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
718 Twine("sycl") + ".descriptor_reg", &M);
719 Func->setSection(".text.startup");
720
721 // Get RegFuncName function declaration.
722 FunctionType *RegFuncTy =
723 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
724 /*isVarArg=*/false);
725 FunctionCallee RegFuncC =
726 M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
727
728 // Construct function body.
729 IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
730 Builder.CreateCall(RegFuncC, FatbinDesc);
731 Builder.CreateRetVoid();
732
733 // Add this function to constructors.
734 appendToGlobalCtors(M, Func, /*Priority*/ 1);
735 }
736
737 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
738 FunctionType *FuncTy =
739 FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
741 "sycl.descriptor_unreg", &M);
742 Func->setSection(".text.startup");
743
744 // Get UnregFuncName function declaration.
745 FunctionType *UnRegFuncTy =
746 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
747 /*isVarArg=*/false);
748 FunctionCallee UnRegFuncC =
749 M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
750
751 // Construct function body
752 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
753 Builder.CreateCall(UnRegFuncC, FatbinDesc);
754 Builder.CreateRetVoid();
755
756 // Add this function to global destructors.
757 appendToGlobalDtors(M, Func, /*Priority*/ 1);
758 }
759
760private:
761 IntegerType *getSizeTTy() {
762 switch (M.getDataLayout().getPointerSize()) {
763 case 4:
764 return Type::getInt32Ty(C);
765 case 8:
766 return Type::getInt64Ty(C);
767 }
768 llvm_unreachable("unsupported pointer type size");
769 }
770
771 SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
772 IntegerType *SizeTTy = getSizeTTy();
773 return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
774 ConstantInt::get(SizeTTy, Second)};
775 }
776
777 /// Note: Properties aren't supported and the support is going
778 /// to be added later.
779 /// Creates a structure corresponding to:
780 /// SYCL specific image descriptor type.
781 /// \code
782 /// struct __sycl.tgt_device_image {
783 /// // Version of this structure - for backward compatibility;
784 /// // all modifications which change order/type/offsets of existing fields
785 /// // should increment the version.
786 /// uint16_t Version;
787 /// // The kind of offload model the image employs.
788 /// uint8_t OffloadKind;
789 /// // Format of the image data - SPIRV, LLVMIR bitcode, etc.
790 /// uint8_t Format;
791 /// // Null-terminated string representation of the device's target
792 /// // architecture.
793 /// const char *Arch;
794 /// // A null-terminated string; target- and compiler-specific options
795 /// // which are passed to the device compiler at runtime.
796 /// const char *CompileOptions;
797 /// // A null-terminated string; target- and compiler-specific options
798 /// // which are passed to the device linker at runtime.
799 /// const char *LinkOptions;
800 /// // Pointer to the device binary image start.
801 /// void *ImageStart;
802 /// // Pointer to the device binary image end.
803 /// void *ImageEnd;
804 /// // The entry table.
805 /// __tgt_offload_entry *EntriesBegin;
806 /// __tgt_offload_entry *EntriesEnd;
807 /// const char *PropertiesBegin;
808 /// const char *PropertiesEnd;
809 /// };
810 /// \endcode
811 StructType *getSyclDeviceImageTy() {
812 return StructType::create(
813 {
814 Type::getInt16Ty(C), // Version
815 Type::getInt8Ty(C), // OffloadKind
816 Type::getInt8Ty(C), // Format
817 PointerType::getUnqual(C), // Arch
818 PointerType::getUnqual(C), // CompileOptions
819 PointerType::getUnqual(C), // LinkOptions
820 PointerType::getUnqual(C), // ImageStart
821 PointerType::getUnqual(C), // ImageEnd
822 PointerType::getUnqual(C), // EntriesBegin
823 PointerType::getUnqual(C), // EntriesEnd
824 PointerType::getUnqual(C), // PropertiesBegin
825 PointerType::getUnqual(C) // PropertiesEnd
826 },
827 "__sycl.tgt_device_image");
828 }
829
830 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
831 /// to:
832 ///
833 /// \code
834 /// struct __sycl.tgt_bin_desc {
835 /// // version of this structure - for backward compatibility;
836 /// // all modifications which change order/type/offsets of existing fields
837 /// // should increment the version.
838 /// uint16_t Version;
839 /// uint16_t NumDeviceImages;
840 /// __sycl.tgt_device_image *DeviceImages;
841 /// // the offload entry table
842 /// __tgt_offload_entry *HostEntriesBegin;
843 /// __tgt_offload_entry *HostEntriesEnd;
844 /// };
845 /// \endcode
846 StructType *getSyclBinDescTy() {
847 return StructType::create(
848 {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
849 PointerType::getUnqual(C), PointerType::getUnqual(C)},
850 "__sycl.tgt_bin_desc");
851 }
852
853 /// Adds a global readonly variable that is initialized by given
854 /// \p Initializer to the module.
855 GlobalVariable *addGlobalArrayVariable(const Twine &Name,
856 ArrayRef<char> Initializer,
857 const Twine &Section = "") {
858 Constant *Arr = ConstantDataArray::get(M.getContext(), Initializer);
859 GlobalVariable *Var =
860 new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
861 GlobalVariable::InternalLinkage, Arr, Name);
862 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
863
864 SmallVector<char, 32> NameBuf;
865 StringRef SectionName = Section.toStringRef(NameBuf);
866 if (!SectionName.empty())
867 Var->setSection(SectionName);
868 return Var;
869 }
870
871 /// Adds given \p Buf as a global variable into the module.
872 /// \returns Pair of pointers that point at the beginning and the end of the
873 /// variable.
874 std::pair<Constant *, Constant *>
875 addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
876 const Twine &Section = "") {
877 GlobalVariable *Var = addGlobalArrayVariable(Name, Buf, Section);
879 getSizetConstPair(0, 0));
881 Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
882 return std::make_pair(ImageB, ImageE);
883 }
884
885 /// Adds given \p Data as constant byte array in the module.
886 /// \returns Constant pointer to the added data. The pointer type does not
887 /// carry size information.
888 Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
889 GlobalVariable *Var = addGlobalArrayVariable(Name, Data);
891 getSizetConstPair(0, 0));
892 return DataPtr;
893 }
894
895 /// Creates a global variable of const char* type and creates an
896 /// initializer that initializes it with \p Str.
897 ///
898 /// \returns Link-time constant pointer (constant expr) to that
899 /// variable.
900 Constant *addStringToModule(StringRef Str, const Twine &Name) {
902 GlobalVariable *Var =
903 new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
904 GlobalVariable::InternalLinkage, Arr, Name);
905 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
906 ConstantInt *Zero = ConstantInt::get(getSizeTTy(), 0);
907 Constant *ZeroZero[] = {Zero, Zero};
908 return ConstantExpr::getGetElementPtr(Var->getValueType(), Var, ZeroZero);
909 }
910
911 /// Each image contains its own set of symbols, which may contain different
912 /// symbols than other images. This function constructs an array of
913 /// symbol entries for a particular image.
914 ///
915 /// \returns Pointers to the beginning and end of the array.
916 std::pair<Constant *, Constant *>
917 initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
918 SmallVector<Constant *> EntriesInits;
919 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
920 Entries, /*BufferName*/ "", /*RequiresNullTerminator*/ false);
921 for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
922 GlobalVariable *GV =
923 emitOffloadingEntry(M, /*Kind*/ OffloadKind::OFK_SYCL,
924 Constant::getNullValue(PointerType::getUnqual(C)),
925 /*Name*/ *LI, /*Size*/ 0,
926 /*Flags*/ 0, /*Data*/ 0);
927 EntriesInits.push_back(GV->getInitializer());
928 }
929
931 ArrayType::get(EntryTy, EntriesInits.size()), EntriesInits);
932 GlobalVariable *EntriesGV = new GlobalVariable(
933 M, Arr->getType(), /*isConstant*/ true, GlobalVariable::InternalLinkage,
934 Arr, OffloadKindTag + "entries_arr");
935
937 EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
939 EntriesGV->getValueType(), EntriesGV,
940 getSizetConstPair(0, EntriesInits.size()));
941 return std::make_pair(EntriesB, EntriesE);
942 }
943
944 Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
945 StringRef OffloadKindTag) {
946 // Note: Intel DPC++ compiler had 2 versions of this structure
947 // and clang++ has a third different structure. To avoid ABI incompatibility
948 // between generated device images the Version here starts from 3.
949 constexpr uint16_t DeviceImageStructVersion = 3;
951 ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
952 Constant *OffloadKindConstant = ConstantInt::get(
953 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
954 Constant *ImageKindConstant = ConstantInt::get(
955 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
956 StringRef Triple = OB.getString("triple");
957 Constant *TripleConstant =
958 addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
959 Constant *CompileOptions =
960 addStringToModule(Options.CompileOptions,
961 Twine(OffloadKindTag) + "opts.compile." + ImageID);
962 Constant *LinkOptions = addStringToModule(
963 Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
964
965 // Note: NULL for now.
966 std::pair<Constant *, Constant *> PropertiesConstants = {
967 Constant::getNullValue(PointerType::getUnqual(C)),
968 Constant::getNullValue(PointerType::getUnqual(C))};
969
970 StringRef RawImage = OB.getImage();
971 std::pair<Constant *, Constant *> Binary = addArrayToModule(
972 ArrayRef<char>(RawImage.begin(), RawImage.end()),
973 Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
974
975 // For SYCL images offload entries are defined here per image.
976 std::pair<Constant *, Constant *> ImageEntriesPtrs =
977 initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
978
979 // .first and .second arguments below correspond to start and end pointers
980 // respectively.
981 Constant *WrappedBinary = ConstantStruct::get(
982 SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
983 TripleConstant, CompileOptions, LinkOptions, Binary.first,
984 Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
985 PropertiesConstants.first, PropertiesConstants.second);
986
987 return WrappedBinary;
988 }
989
990 GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
991 StringRef OffloadKindTag) {
992 Constant *ImagesData = ConstantArray::get(
993 ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
994 GlobalVariable *ImagesGV =
995 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
997 Twine(OffloadKindTag) + "device_images");
998 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
999
1000 ConstantInt *Zero = ConstantInt::get(getSizeTTy(), 0);
1001 Constant *ZeroZero[] = {Zero, Zero};
1003 ImagesGV, ZeroZero);
1004
1005 Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
1006 Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
1007 static constexpr uint16_t BinDescStructVersion = 1;
1008 Constant *DescInit = ConstantStruct::get(
1009 SyclBinDescTy,
1010 ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
1011 ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesB,
1012 EntriesB, EntriesE);
1013
1014 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
1016 Twine(OffloadKindTag) + "descriptor");
1017 }
1018
1019 Module &M;
1020 LLVMContext &C;
1021 SYCLJITOptions Options;
1022
1023 StructType *EntryTy = nullptr;
1024 StructType *SyclDeviceImageTy = nullptr;
1025 StructType *SyclBinDescTy = nullptr;
1026}; // end of SYCLWrapper
1027
1028} // namespace
1029
1031 EntryArrayTy EntryArray,
1032 llvm::StringRef Suffix, bool Relocatable) {
1034 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
1035 if (!Desc)
1037 "No binary descriptors created.");
1038 createRegisterFunction(M, Desc, Suffix);
1039 return Error::success();
1040}
1041
1043 EntryArrayTy EntryArray,
1044 llvm::StringRef Suffix,
1045 bool EmitSurfacesAndTextures) {
1046 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix);
1047 if (!Desc)
1049 "No fatbin section created.");
1050
1051 createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix,
1052 EmitSurfacesAndTextures);
1053 return Error::success();
1054}
1055
1057 EntryArrayTy EntryArray, llvm::StringRef Suffix,
1058 bool EmitSurfacesAndTextures) {
1059 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix);
1060 if (!Desc)
1062 "No fatbin section created.");
1063
1064 createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix,
1065 EmitSurfacesAndTextures);
1066 return Error::success();
1067}
1068
1071 SYCLWrapper W(M, Options);
1072 MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
1073 /*Identifier*/ "");
1074 SmallVector<OffloadFile> OffloadFiles;
1075 if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
1076 return E;
1077
1078 GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
1079 if (!Desc)
1081 "No binary descriptors created.");
1082
1083 W.createRegisterFatbinFunction(Desc);
1084 W.createUnregisterFunction(Desc);
1085 return Error::success();
1086}
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file defines the SmallVector class.
@ ConstantBit
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1306
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1279
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
LLVM_ABI void setSection(StringRef S)
Change the section for this global.
Definition Globals.cpp:275
void setUnnamedAddr(UnnamedAddr Val)
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
iterator begin() const
Definition StringRef.h:112
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
static LLVM_ABI StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
Definition Type.cpp:738
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition Type.cpp:619
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isMacOSX() const
Is this a Mac OS X triple.
Definition Triple.h:582
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static uint64_t getAlignment()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char SectionName[]
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Definition CoroShape.h:31
LLVM_ABI Error extractOffloadBinaries(MemoryBufferRef Buffer, SmallVectorImpl< OffloadFile > &Binaries)
Extracts embedded device offloading code from a memory Buffer to a list of Binaries.
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
Definition Utility.cpp:86
LLVM_ABI StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
Definition Utility.cpp:26
LLVM_ABI llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::ArrayRef< char > Buffer, SYCLJITOptions Options=SYCLJITOptions())
Wraps OffloadBinaries in the given Buffers into the module M as global symbols and registers the imag...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
Definition Utility.h:58
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
Definition Utility.h:60
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
Definition Utility.h:66
@ OffloadGlobalEntry
Mark the entry as a global entry.
Definition Utility.h:54
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
Definition Utility.h:56
@ OffloadGlobalExtern
Mark the entry as being extern.
Definition Utility.h:62
@ OffloadGlobalConstant
Mark the entry as being constant.
Definition Utility.h:64
LLVM_ABI llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
LLVM_ABI llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
LLVM_ABI llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:163
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ offload_binary
LLVM offload object file.
Definition Magic.h:58