LLVM 22.0.0git
OffloadWrapper.cpp
Go to the documentation of this file.
1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/Twine.h"
16#include "llvm/IR/Constants.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/Type.h"
24#include "llvm/Support/Error.h"
30
31#include <memory>
32#include <string>
33#include <utility>
34
35using namespace llvm;
36using namespace llvm::object;
37using namespace llvm::offloading;
38
39namespace {
40/// Magic number that begins the section containing the CUDA fatbinary.
41constexpr unsigned CudaFatMagic = 0x466243b1;
42constexpr unsigned HIPFatMagic = 0x48495046;
43
45 return M.getDataLayout().getIntPtrType(M.getContext());
46}
47
48// struct __tgt_device_image {
49// void *ImageStart;
50// void *ImageEnd;
51// __tgt_offload_entry *EntriesBegin;
52// __tgt_offload_entry *EntriesEnd;
53// };
54StructType *getDeviceImageTy(Module &M) {
55 LLVMContext &C = M.getContext();
56 StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image");
57 if (!ImageTy)
58 ImageTy =
59 StructType::create("__tgt_device_image", PointerType::getUnqual(C),
62 return ImageTy;
63}
64
65PointerType *getDeviceImagePtrTy(Module &M) {
66 return PointerType::getUnqual(M.getContext());
67}
68
69// struct __tgt_bin_desc {
70// int32_t NumDeviceImages;
71// __tgt_device_image *DeviceImages;
72// __tgt_offload_entry *HostEntriesBegin;
73// __tgt_offload_entry *HostEntriesEnd;
74// };
75StructType *getBinDescTy(Module &M) {
76 LLVMContext &C = M.getContext();
77 StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc");
78 if (!DescTy)
79 DescTy = StructType::create(
80 "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M),
82 return DescTy;
83}
84
85PointerType *getBinDescPtrTy(Module &M) {
86 return PointerType::getUnqual(M.getContext());
87}
88
89/// Creates binary descriptor for the given device images. Binary descriptor
90/// is an object that is passed to the offloading runtime at program startup
91/// and it describes all device images available in the executable or shared
92/// library. It is defined as follows
93///
94/// __attribute__((visibility("hidden")))
95/// extern __tgt_offload_entry *__start_omp_offloading_entries;
96/// __attribute__((visibility("hidden")))
97/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
98///
99/// static const char Image0[] = { <Bufs.front() contents> };
100/// ...
101/// static const char ImageN[] = { <Bufs.back() contents> };
102///
103/// static const __tgt_device_image Images[] = {
104/// {
105/// Image0, /*ImageStart*/
106/// Image0 + sizeof(Image0), /*ImageEnd*/
107/// __start_omp_offloading_entries, /*EntriesBegin*/
108/// __stop_omp_offloading_entries /*EntriesEnd*/
109/// },
110/// ...
111/// {
112/// ImageN, /*ImageStart*/
113/// ImageN + sizeof(ImageN), /*ImageEnd*/
114/// __start_omp_offloading_entries, /*EntriesBegin*/
115/// __stop_omp_offloading_entries /*EntriesEnd*/
116/// }
117/// };
118///
119/// static const __tgt_bin_desc BinDesc = {
120/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
121/// Images, /*DeviceImages*/
122/// __start_omp_offloading_entries, /*HostEntriesBegin*/
123/// __stop_omp_offloading_entries /*HostEntriesEnd*/
124/// };
125///
126/// Global variable that represents BinDesc is returned.
127GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
128 EntryArrayTy EntryArray, StringRef Suffix,
129 bool Relocatable) {
130 LLVMContext &C = M.getContext();
131 auto [EntriesB, EntriesE] = EntryArray;
132
133 auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
134 Constant *ZeroZero[] = {Zero, Zero};
135
136 // Create initializer for the images array.
137 SmallVector<Constant *, 4u> ImagesInits;
138 ImagesInits.reserve(Bufs.size());
139 for (ArrayRef<char> Buf : Bufs) {
140 // We embed the full offloading entry so the binary utilities can parse it.
141 auto *Data = ConstantDataArray::get(C, Buf);
142 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
144 ".omp_offloading.device_image" + Suffix);
146 Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
147 : ".llvm.offloading");
149
150 StringRef Binary(Buf.data(), Buf.size());
152 "Invalid binary format");
153
154 // The device image struct contains the pointer to the beginning and end of
155 // the image stored inside of the offload binary. There should only be one
156 // of these for each buffer so we parse it out manually.
157 const auto *Header =
158 reinterpret_cast<const object::OffloadBinary::Header *>(
159 Binary.bytes_begin());
160 const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>(
161 Binary.bytes_begin() + Header->EntryOffset);
162
163 auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset);
164 auto *Size =
165 ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
166 Constant *ZeroBegin[] = {Zero, Begin};
167 Constant *ZeroSize[] = {Zero, Size};
168
169 auto *ImageB =
170 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin);
171 auto *ImageE =
172 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
173
174 ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
175 ImageE, EntriesB, EntriesE));
176 }
177
178 // Then create images array.
179 auto *ImagesData = ConstantArray::get(
180 ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits);
181
182 auto *Images =
183 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
185 ".omp_offloading.device_images" + Suffix);
186 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
187
188 auto *ImagesB =
189 ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero);
190
191 // And finally create the binary descriptor object.
192 auto *DescInit = ConstantStruct::get(
193 getBinDescTy(M),
194 ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
195 EntriesB, EntriesE);
196
197 return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
199 ".omp_offloading.descriptor" + Suffix);
200}
201
202Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
203 StringRef Suffix) {
204 LLVMContext &C = M.getContext();
205 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
206 auto *Func =
208 ".omp_offloading.descriptor_unreg" + Suffix, &M);
209 Func->setSection(".text.startup");
210
211 // Get __tgt_unregister_lib function declaration.
212 auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
213 /*isVarArg*/ false);
214 FunctionCallee UnRegFuncC =
215 M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
216
217 // Construct function body
218 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
219 Builder.CreateCall(UnRegFuncC, BinDesc);
220 Builder.CreateRetVoid();
221
222 return Func;
223}
224
225void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
226 StringRef Suffix) {
227 LLVMContext &C = M.getContext();
228 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
230 ".omp_offloading.descriptor_reg" + Suffix, &M);
231 Func->setSection(".text.startup");
232
233 // Get __tgt_register_lib function declaration.
234 auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
235 /*isVarArg*/ false);
236 FunctionCallee RegFuncC =
237 M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
238
239 auto *AtExitTy = FunctionType::get(
240 Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
241 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
242
243 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
244
245 // Construct function body
246 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
247
248 Builder.CreateCall(RegFuncC, BinDesc);
249
250 // Register the destructors with 'atexit'. This is expected by the CUDA
251 // runtime and ensures that we clean up before dynamic objects are destroyed.
252 // This needs to be done after plugin initialization to ensure that it is
253 // called before the plugin runtime is destroyed.
254 Builder.CreateCall(AtExit, UnregFunc);
255 Builder.CreateRetVoid();
256
257 // Add this function to constructors.
258 appendToGlobalCtors(M, Func, /*Priority=*/101);
259}
260
261// struct fatbin_wrapper {
262// int32_t magic;
263// int32_t version;
264// void *image;
265// void *reserved;
266//};
267StructType *getFatbinWrapperTy(Module &M) {
268 LLVMContext &C = M.getContext();
269 StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper");
270 if (!FatbinTy)
271 FatbinTy = StructType::create(
272 "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C),
274 return FatbinTy;
275}
276
277/// Embed the image \p Image into the module \p M so it can be found by the
278/// runtime.
279GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
280 StringRef Suffix) {
281 LLVMContext &C = M.getContext();
282 llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
283 const llvm::Triple &Triple = M.getTargetTriple();
284
285 // Create the global string containing the fatbinary.
286 StringRef FatbinConstantSection =
287 IsHIP ? ".hip_fatbin"
288 : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
289 auto *Data = ConstantDataArray::get(C, Image);
290 auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
292 ".fatbin_image" + Suffix);
293 Fatbin->setSection(FatbinConstantSection);
294
295 // Create the fatbinary wrapper
296 StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
297 : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
298 : ".nvFatBinSegment";
299 Constant *FatbinWrapper[] = {
300 ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
301 ConstantInt::get(Type::getInt32Ty(C), 1),
304
305 Constant *FatbinInitializer =
306 ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper);
307
308 auto *FatbinDesc =
309 new GlobalVariable(M, getFatbinWrapperTy(M),
310 /*isConstant*/ true, GlobalValue::InternalLinkage,
311 FatbinInitializer, ".fatbin_wrapper" + Suffix);
312 FatbinDesc->setSection(FatbinWrapperSection);
313 FatbinDesc->setAlignment(Align(8));
314
315 return FatbinDesc;
316}
317
318/// Create the register globals function. We will iterate all of the offloading
319/// entries stored at the begin / end symbols and register them according to
320/// their type. This creates the following function in IR:
321///
322/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
323/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
324///
325/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
326/// void *, void *, void *, void *, int *);
327/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
328/// int64_t, int32_t, int32_t);
329///
330/// void __cudaRegisterTest(void **fatbinHandle) {
331/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
332/// entry != &__stop_cuda_offloading_entries; ++entry) {
333/// if (entry->Kind != OFK_CUDA)
334/// continue
335///
336/// if (!entry->Size)
337/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
338/// entry->name, -1, 0, 0, 0, 0, 0);
339/// else
340/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
341/// 0, entry->size, 0, 0);
342/// }
343/// }
344Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
345 EntryArrayTy EntryArray,
346 StringRef Suffix,
347 bool EmitSurfacesAndTextures) {
348 LLVMContext &C = M.getContext();
349 auto [EntriesB, EntriesE] = EntryArray;
350
351 // Get the __cudaRegisterFunction function declaration.
352 PointerType *Int8PtrTy = PointerType::get(C, 0);
353 PointerType *Int8PtrPtrTy = PointerType::get(C, 0);
354 PointerType *Int32PtrTy = PointerType::get(C, 0);
355 auto *RegFuncTy = FunctionType::get(
357 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
358 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
359 /*isVarArg*/ false);
360 FunctionCallee RegFunc = M.getOrInsertFunction(
361 IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
362
363 // Get the __cudaRegisterVar function declaration.
364 auto *RegVarTy = FunctionType::get(
366 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
368 /*isVarArg*/ false);
369 FunctionCallee RegVar = M.getOrInsertFunction(
370 IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
371
372 // Get the __cudaRegisterSurface function declaration.
373 FunctionType *RegManagedVarTy =
375 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
377 /*isVarArg=*/false);
378 FunctionCallee RegManagedVar = M.getOrInsertFunction(
379 IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
380 RegManagedVarTy);
381
382 // Get the __cudaRegisterSurface function declaration.
383 FunctionType *RegSurfaceTy =
385 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
387 /*isVarArg=*/false);
388 FunctionCallee RegSurface = M.getOrInsertFunction(
389 IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy);
390
391 // Get the __cudaRegisterTexture function declaration.
392 FunctionType *RegTextureTy = FunctionType::get(
394 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
396 /*isVarArg=*/false);
397 FunctionCallee RegTexture = M.getOrInsertFunction(
398 IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy);
399
400 auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy,
401 /*isVarArg*/ false);
402 auto *RegGlobalsFn =
404 IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
405 RegGlobalsFn->setSection(".text.startup");
406
407 // Create the loop to register all the entries.
408 IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
409 auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn);
410 auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn);
411 auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn);
412 auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn);
413 auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn);
414 auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn);
415 auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn);
416 auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn);
417 auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn);
418 auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn);
419
420 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
421 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
422 Builder.SetInsertPoint(EntryBB);
423 auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry");
424 auto *AddrPtr =
425 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
426 {ConstantInt::get(Type::getInt32Ty(C), 0),
427 ConstantInt::get(Type::getInt32Ty(C), 4)});
428 auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr");
429 auto *AuxAddrPtr =
430 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
431 {ConstantInt::get(Type::getInt32Ty(C), 0),
432 ConstantInt::get(Type::getInt32Ty(C), 8)});
433 auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr");
434 auto *KindPtr =
435 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
436 {ConstantInt::get(Type::getInt32Ty(C), 0),
437 ConstantInt::get(Type::getInt32Ty(C), 2)});
438 auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind");
439 auto *NamePtr =
440 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
441 {ConstantInt::get(Type::getInt32Ty(C), 0),
442 ConstantInt::get(Type::getInt32Ty(C), 5)});
443 auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name");
444 auto *SizePtr =
445 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
446 {ConstantInt::get(Type::getInt32Ty(C), 0),
447 ConstantInt::get(Type::getInt32Ty(C), 6)});
448 auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size");
449 auto *FlagsPtr =
450 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
451 {ConstantInt::get(Type::getInt32Ty(C), 0),
452 ConstantInt::get(Type::getInt32Ty(C), 3)});
453 auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags");
454 auto *DataPtr =
455 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
456 {ConstantInt::get(Type::getInt32Ty(C), 0),
457 ConstantInt::get(Type::getInt32Ty(C), 7)});
458 auto *Data = Builder.CreateTrunc(
459 Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"),
461 auto *Type = Builder.CreateAnd(
462 Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type");
463
464 // Extract the flags stored in the bit-field and convert them to C booleans.
465 auto *ExternBit = Builder.CreateAnd(
466 Flags, ConstantInt::get(Type::getInt32Ty(C),
468 auto *Extern = Builder.CreateLShr(
469 ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern");
470 auto *ConstantBit = Builder.CreateAnd(
471 Flags, ConstantInt::get(Type::getInt32Ty(C),
473 auto *Const = Builder.CreateLShr(
474 ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant");
475 auto *NormalizedBit = Builder.CreateAnd(
476 Flags, ConstantInt::get(Type::getInt32Ty(C),
478 auto *Normalized = Builder.CreateLShr(
479 NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized");
480 auto *KindCond = Builder.CreateICmpEQ(
481 Kind, ConstantInt::get(Type::getInt16Ty(C),
484 Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
485 Builder.SetInsertPoint(IfKindBB);
486 auto *FnCond = Builder.CreateICmpEQ(
488 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
489
490 // Create kernel registration code.
491 Builder.SetInsertPoint(IfThenBB);
492 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
493 ConstantInt::get(Type::getInt32Ty(C), -1),
494 ConstantPointerNull::get(Int8PtrTy),
495 ConstantPointerNull::get(Int8PtrTy),
496 ConstantPointerNull::get(Int8PtrTy),
497 ConstantPointerNull::get(Int8PtrTy),
498 ConstantPointerNull::get(Int32PtrTy)});
499 Builder.CreateBr(IfEndBB);
500 Builder.SetInsertPoint(IfElseBB);
501
502 auto *Switch = Builder.CreateSwitch(Type, IfEndBB);
503 // Create global variable registration code.
504 Builder.SetInsertPoint(SwGlobalBB);
505 Builder.CreateCall(RegVar,
506 {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
507 Const, ConstantInt::get(Type::getInt32Ty(C), 0)});
508 Builder.CreateBr(IfEndBB);
509 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry),
510 SwGlobalBB);
511
512 // Create managed variable registration code.
513 Builder.SetInsertPoint(SwManagedBB);
514 Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
515 Name, Size, Data});
516 Builder.CreateBr(IfEndBB);
517 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
518 SwManagedBB);
519 // Create surface variable registration code.
520 Builder.SetInsertPoint(SwSurfaceBB);
521 if (EmitSurfacesAndTextures)
522 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
523 Data, Extern});
524 Builder.CreateBr(IfEndBB);
525 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry),
526 SwSurfaceBB);
527
528 // Create texture variable registration code.
529 Builder.SetInsertPoint(SwTextureBB);
530 if (EmitSurfacesAndTextures)
531 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
532 Data, Normalized, Extern});
533 Builder.CreateBr(IfEndBB);
534 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry),
535 SwTextureBB);
536
537 Builder.SetInsertPoint(IfEndBB);
538 auto *NewEntry = Builder.CreateInBoundsGEP(
539 offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1));
540 auto *Cmp = Builder.CreateICmpEQ(
541 NewEntry,
543 ArrayType::get(offloading::getEntryTy(M), 0), EntriesE,
544 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
545 ConstantInt::get(getSizeTTy(M), 0)})));
546 Entry->addIncoming(
548 ArrayType::get(offloading::getEntryTy(M), 0), EntriesB,
549 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
550 ConstantInt::get(getSizeTTy(M), 0)})),
551 &RegGlobalsFn->getEntryBlock());
552 Entry->addIncoming(NewEntry, IfEndBB);
553 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
554 Builder.SetInsertPoint(ExitBB);
555 Builder.CreateRetVoid();
556
557 return RegGlobalsFn;
558}
559
560// Create the constructor and destructor to register the fatbinary with the CUDA
561// runtime.
562void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
563 bool IsHIP, EntryArrayTy EntryArray,
564 StringRef Suffix,
565 bool EmitSurfacesAndTextures) {
566 LLVMContext &C = M.getContext();
567 auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
568 auto *CtorFunc = Function::Create(
570 (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
571 CtorFunc->setSection(".text.startup");
572
573 auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
574 auto *DtorFunc = Function::Create(
576 (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
577 DtorFunc->setSection(".text.startup");
578
579 auto *PtrTy = PointerType::getUnqual(C);
580
581 // Get the __cudaRegisterFatBinary function declaration.
582 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false);
583 FunctionCallee RegFatbin = M.getOrInsertFunction(
584 IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
585 // Get the __cudaRegisterFatBinaryEnd function declaration.
586 auto *RegFatEndTy =
587 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
588 FunctionCallee RegFatbinEnd =
589 M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy);
590 // Get the __cudaUnregisterFatBinary function declaration.
591 auto *UnregFatTy =
592 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
593 FunctionCallee UnregFatbin = M.getOrInsertFunction(
594 IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
595 UnregFatTy);
596
597 auto *AtExitTy =
598 FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false);
599 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
600
601 auto *BinaryHandleGlobal = new llvm::GlobalVariable(
602 M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
604 (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
605
606 // Create the constructor to register this image with the runtime.
607 IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
608 CallInst *Handle = CtorBuilder.CreateCall(
609 RegFatbin,
611 CtorBuilder.CreateAlignedStore(
612 Handle, BinaryHandleGlobal,
613 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
614 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
615 Suffix,
616 EmitSurfacesAndTextures),
617 Handle);
618 if (!IsHIP)
619 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
620 CtorBuilder.CreateCall(AtExit, DtorFunc);
621 CtorBuilder.CreateRetVoid();
622
623 // Create the destructor to unregister the image with the runtime. We cannot
624 // use a standard global destructor after CUDA 9.2 so this must be called by
625 // `atexit()` instead.
626 IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc));
627 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
628 PtrTy, BinaryHandleGlobal,
629 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
630 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
631 DtorBuilder.CreateRetVoid();
632
633 // Add this function to constructors.
634 appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
635}
636
637/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
638struct SYCLWrapper {
639 Module &M;
640 LLVMContext &C;
641 SYCLJITOptions Options;
642
643 StructType *EntryTy = nullptr;
644 StructType *SyclDeviceImageTy = nullptr;
645 StructType *SyclBinDescTy = nullptr;
646
647 SYCLWrapper(Module &M, const SYCLJITOptions &Options)
648 : M(M), C(M.getContext()), Options(Options) {
649 EntryTy = offloading::getEntryTy(M);
650 SyclDeviceImageTy = getSyclDeviceImageTy();
651 SyclBinDescTy = getSyclBinDescTy();
652 }
653
654 IntegerType *getSizeTTy() {
655 switch (M.getDataLayout().getPointerSize()) {
656 case 4:
657 return Type::getInt32Ty(C);
658 case 8:
659 return Type::getInt64Ty(C);
660 }
661 llvm_unreachable("unsupported pointer type size");
662 }
663
664 SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
665 IntegerType *SizeTTy = getSizeTTy();
666 return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
667 ConstantInt::get(SizeTTy, Second)};
668 }
669
670 /// Note: Properties aren't supported and the support is going
671 /// to be added later.
672 /// Creates a structure corresponding to:
673 /// SYCL specific image descriptor type.
674 /// \code
675 /// struct __sycl.tgt_device_image {
676 /// // version of this structure - for backward compatibility;
677 /// // all modifications which change order/type/offsets of existing fields
678 /// // should increment the version.
679 /// uint16_t Version;
680 /// // the kind of offload model the image employs.
681 /// uint8_t OffloadKind;
682 /// // format of the image data - SPIRV, LLVMIR bitcode, etc
683 /// uint8_t Format;
684 /// // null-terminated string representation of the device's target
685 /// // architecture
686 /// const char *Arch;
687 /// // a null-terminated string; target- and compiler-specific options
688 /// // which are suggested to use to "compile" program at runtime
689 /// const char *CompileOptions;
690 /// // a null-terminated string; target- and compiler-specific options
691 /// // which are suggested to use to "link" program at runtime
692 /// const char *LinkOptions;
693 /// // Pointer to the device binary image start
694 /// void *ImageStart;
695 /// // Pointer to the device binary image end
696 /// void *ImageEnd;
697 /// // the entry table
698 /// __tgt_offload_entry *EntriesBegin;
699 /// __tgt_offload_entry *EntriesEnd;
700 /// const char *PropertiesBegin;
701 /// const char *PropertiesEnd;
702 /// };
703 /// \endcode
704 StructType *getSyclDeviceImageTy() {
705 return StructType::create(
706 {
707 Type::getInt16Ty(C), // Version
708 Type::getInt8Ty(C), // OffloadKind
709 Type::getInt8Ty(C), // Format
710 PointerType::getUnqual(C), // Arch
711 PointerType::getUnqual(C), // CompileOptions
712 PointerType::getUnqual(C), // LinkOptions
713 PointerType::getUnqual(C), // ImageStart
714 PointerType::getUnqual(C), // ImageEnd
715 PointerType::getUnqual(C), // EntriesBegin
716 PointerType::getUnqual(C), // EntriesEnd
717 PointerType::getUnqual(C), // PropertiesBegin
718 PointerType::getUnqual(C) // PropertiesEnd
719 },
720 "__sycl.tgt_device_image");
721 }
722
723 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
724 /// to:
725 ///
726 /// \code
727 /// struct __sycl.tgt_bin_desc {
728 /// // version of this structure - for backward compatibility;
729 /// // all modifications which change order/type/offsets of existing fields
730 /// // should increment the version.
731 /// uint16_t Version;
732 /// uint16_t NumDeviceImages;
733 /// __sycl.tgt_device_image *DeviceImages;
734 /// // the offload entry table
735 /// __tgt_offload_entry *HostEntriesBegin;
736 /// __tgt_offload_entry *HostEntriesEnd;
737 /// };
738 /// \endcode
739 StructType *getSyclBinDescTy() {
740 return StructType::create(
741 {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
742 PointerType::getUnqual(C), PointerType::getUnqual(C)},
743 "__sycl.tgt_bin_desc");
744 }
745
746 /// Adds a global readonly variable that is initialized by given
747 /// \p Initializer to the module.
748 GlobalVariable *addGlobalArrayVariable(const Twine &Name,
749 ArrayRef<char> Initializer,
750 const Twine &Section = "") {
751 auto *Arr = ConstantDataArray::get(M.getContext(), Initializer);
752 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
753 GlobalVariable::InternalLinkage, Arr, Name);
754 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
755
756 SmallVector<char, 32> NameBuf;
757 auto SectionName = Section.toStringRef(NameBuf);
758 if (!SectionName.empty())
759 Var->setSection(SectionName);
760 return Var;
761 }
762
763 /// Adds given \p Buf as a global variable into the module.
764 /// \returns Pair of pointers that point at the beginning and the end of the
765 /// variable.
766 std::pair<Constant *, Constant *>
767 addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
768 const Twine &Section = "") {
769 auto *Var = addGlobalArrayVariable(Name, Buf, Section);
770 auto *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
771 getSizetConstPair(0, 0));
772 auto *ImageE = ConstantExpr::getGetElementPtr(
773 Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
774 return std::make_pair(ImageB, ImageE);
775 }
776
777 /// Adds given \p Data as constant byte array in the module.
778 /// \returns Constant pointer to the added data. The pointer type does not
779 /// carry size information.
780 Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
781 auto *Var = addGlobalArrayVariable(Name, Data);
782 auto *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
783 getSizetConstPair(0, 0));
784 return DataPtr;
785 }
786
787 /// Creates a global variable of const char* type and creates an
788 /// initializer that initializes it with \p Str.
789 ///
790 /// \returns Link-time constant pointer (constant expr) to that
791 /// variable.
792 Constant *addStringToModule(StringRef Str, const Twine &Name) {
793 auto *Arr = ConstantDataArray::getString(C, Str);
794 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
795 GlobalVariable::InternalLinkage, Arr, Name);
796 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
797 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
798 Constant *ZeroZero[] = {Zero, Zero};
799 return ConstantExpr::getGetElementPtr(Var->getValueType(), Var, ZeroZero);
800 }
801
802 /// Each image contains its own set of symbols, which may contain different
803 /// symbols than other images. This function constructs an array of
804 /// symbol entries for a particular image.
805 ///
806 /// \returns Pointers to the beginning and end of the array.
807 std::pair<Constant *, Constant *>
808 initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
809 SmallVector<Constant *> EntriesInits;
810 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
811 Entries, /*BufferName*/ "", /*RequiresNullTerminator*/ false);
812 for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
813 GlobalVariable *GV =
814 emitOffloadingEntry(M, /*Kind*/ OffloadKind::OFK_SYCL,
815 Constant::getNullValue(PointerType::getUnqual(C)),
816 /*Name*/ *LI, /*Size*/ 0,
817 /*Flags*/ 0, /*Data*/ 0);
818 EntriesInits.push_back(GV->getInitializer());
819 }
820
821 auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()),
822 EntriesInits);
823 auto *EntriesGV = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
824 GlobalVariable::InternalLinkage, Arr,
825 OffloadKindTag + "entries_arr");
826
827 auto *EntriesB = ConstantExpr::getGetElementPtr(
828 EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
829 auto *EntriesE = ConstantExpr::getGetElementPtr(
830 EntriesGV->getValueType(), EntriesGV,
831 getSizetConstPair(0, EntriesInits.size()));
832 return std::make_pair(EntriesB, EntriesE);
833 }
834
835 Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
836 StringRef OffloadKindTag) {
837 // Note: Intel DPC++ compiler had 2 versions of this structure
838 // and clang++ has a third different structure. To avoid ABI incompatibility
839 // between generated device images the Version here starts from 3.
840 constexpr uint16_t DeviceImageStructVersion = 3;
842 ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
843 Constant *OffloadKindConstant = ConstantInt::get(
844 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
845 Constant *ImageKindConstant = ConstantInt::get(
846 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
847 StringRef Triple = OB.getString("triple");
848 Constant *TripleConstant =
849 addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
850 Constant *CompileOptions =
851 addStringToModule(Options.CompileOptions,
852 Twine(OffloadKindTag) + "opts.compile." + ImageID);
853 Constant *LinkOptions = addStringToModule(
854 Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
855
856 // Note: NULL for now.
857 std::pair<Constant *, Constant *> PropertiesConstants = {
858 Constant::getNullValue(PointerType::getUnqual(C)),
859 Constant::getNullValue(PointerType::getUnqual(C))};
860
861 StringRef RawImage = OB.getImage();
862 std::pair<Constant *, Constant *> Binary = addArrayToModule(
863 ArrayRef<char>(RawImage.begin(), RawImage.end()),
864 Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
865
866 // For SYCL images offload entries are defined here per image.
867 std::pair<Constant *, Constant *> ImageEntriesPtrs =
868 initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
869 Constant *WrappedBinary = ConstantStruct::get(
870 SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
871 TripleConstant, CompileOptions, LinkOptions, Binary.first,
872 Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
873 PropertiesConstants.first, PropertiesConstants.second);
874
875 return WrappedBinary;
876 }
877
878 GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
879 StringRef OffloadKindTag) {
880 auto *ImagesData = ConstantArray::get(
881 ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
882 auto *ImagesGV =
883 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
885 Twine(OffloadKindTag) + "device_images");
886 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
887
888 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
889 Constant *ZeroZero[] = {Zero, Zero};
890 auto *ImagesB = ConstantExpr::getGetElementPtr(ImagesGV->getValueType(),
891 ImagesGV, ZeroZero);
892
893 Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
894 Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
895 static constexpr uint16_t BinDescStructVersion = 1;
896 auto *DescInit = ConstantStruct::get(
897 SyclBinDescTy,
898 ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
899 ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesB,
900 EntriesB, EntriesE);
901
902 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
904 Twine(OffloadKindTag) + "descriptor");
905 }
906
907 /// Creates binary descriptor for the given device images. Binary descriptor
908 /// is an object that is passed to the offloading runtime at program startup
909 /// and it describes all device images available in the executable or shared
910 /// library. It is defined as follows:
911 ///
912 /// \code
913 /// __attribute__((visibility("hidden")))
914 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
915 /// ...
916 /// __attribute__((visibility("hidden")))
917 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
918 ///
919 /// __attribute__((visibility("hidden")))
920 /// extern const char *CompileOptions = "...";
921 /// ...
922 /// __attribute__((visibility("hidden")))
923 /// extern const char *LinkOptions = "...";
924 /// ...
925 ///
926 /// static const char Image0[] = { ... };
927 /// ...
928 /// static const char ImageN[] = { ... };
929 ///
930 /// static const __sycl.tgt_device_image Images[] = {
931 /// {
932 /// Version, // Version
933 /// OffloadKind, // OffloadKind
934 /// Format, // Format of the image.
935 // TripleString, // Arch
936 /// CompileOptions, // CompileOptions
937 /// LinkOptions, // LinkOptions
938 /// Image0, // ImageStart
939 /// Image0 + IMAGE0_SIZE, // ImageEnd
940 /// __sycl_offload_entries_arr0, // EntriesBegin
941 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
942 /// NULL, // PropertiesBegin
943 /// NULL, // PropertiesEnd
944 /// },
945 /// ...
946 /// };
947 ///
948 /// static const __sycl.tgt_bin_desc FatbinDesc = {
949 /// Version, //Version
950 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
951 /// Images, //DeviceImages
952 /// NULL, //HostEntriesBegin
953 /// NULL //HostEntriesEnd
954 /// };
955 /// \endcode
956 ///
957 /// \returns Global variable that represents FatbinDesc.
958 GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
959 StringRef OffloadKindTag = ".sycl_offloading.";
960 SmallVector<Constant *> WrappedImages;
961 WrappedImages.reserve(OffloadFiles.size());
962 for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
963 WrappedImages.push_back(
964 wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
965
966 return combineWrappedImages(WrappedImages, OffloadKindTag);
967 }
968
969 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
970 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
972 Twine("sycl") + ".descriptor_reg", &M);
973 Func->setSection(".text.startup");
974
975 // Get RegFuncName function declaration.
976 auto *RegFuncTy =
977 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
978 /*isVarArg=*/false);
979 FunctionCallee RegFuncC =
980 M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
981
982 // Construct function body
983 IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
984 Builder.CreateCall(RegFuncC, FatbinDesc);
985 Builder.CreateRetVoid();
986
987 // Add this function to constructors.
988 appendToGlobalCtors(M, Func, /*Priority*/ 1);
989 }
990
991 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
992 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
994 "sycl.descriptor_unreg", &M);
995 Func->setSection(".text.startup");
996
997 // Get UnregFuncName function declaration.
998 auto *UnRegFuncTy =
999 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
1000 /*isVarArg=*/false);
1001 FunctionCallee UnRegFuncC =
1002 M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
1003
1004 // Construct function body
1005 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
1006 Builder.CreateCall(UnRegFuncC, FatbinDesc);
1007 Builder.CreateRetVoid();
1008
1009 // Add this function to global destructors.
1010 appendToGlobalDtors(M, Func, /*Priority*/ 1);
1011 }
1012}; // end of SYCLWrapper
1013
1014} // namespace
1015
1017 EntryArrayTy EntryArray,
1018 llvm::StringRef Suffix, bool Relocatable) {
1020 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
1021 if (!Desc)
1023 "No binary descriptors created.");
1024 createRegisterFunction(M, Desc, Suffix);
1025 return Error::success();
1026}
1027
1029 EntryArrayTy EntryArray,
1030 llvm::StringRef Suffix,
1031 bool EmitSurfacesAndTextures) {
1032 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix);
1033 if (!Desc)
1035 "No fatbin section created.");
1036
1037 createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix,
1038 EmitSurfacesAndTextures);
1039 return Error::success();
1040}
1041
1043 EntryArrayTy EntryArray, llvm::StringRef Suffix,
1044 bool EmitSurfacesAndTextures) {
1045 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix);
1046 if (!Desc)
1048 "No fatbin section created.");
1049
1050 createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix,
1051 EmitSurfacesAndTextures);
1052 return Error::success();
1053}
1054
1057 SYCLWrapper W(M, Options);
1058 MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
1059 /*Identifier*/ "");
1060 SmallVector<OffloadFile> OffloadFiles;
1061 if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
1062 return E;
1063
1064 GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
1065 if (!Desc)
1067 "No binary descriptors created.");
1068
1069 W.createRegisterFatbinFunction(Desc);
1070 W.createUnregisterFunction(Desc);
1071 return Error::success();
1072}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file defines the SmallVector class.
@ ConstantBit
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
iterator begin() const
Definition StringRef.h:112
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
static LLVM_ABI StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
Definition Type.cpp:739
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition Type.cpp:620
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isMacOSX() const
Is this a Mac OS X triple.
Definition Triple.h:566
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:296
static uint64_t getAlignment()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char SectionName[]
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Definition CoroShape.h:31
LLVM_ABI Error extractOffloadBinaries(MemoryBufferRef Buffer, SmallVectorImpl< OffloadFile > &Binaries)
Extracts embedded device offloading code from a memory Buffer to a list of Binaries.
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
Definition Utility.cpp:86
LLVM_ABI StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
Definition Utility.cpp:26
LLVM_ABI llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::ArrayRef< char > Buffer, SYCLJITOptions Options=SYCLJITOptions())
Wraps OffloadBinaries in the given Buffers into the module M as global symbols and registers the imag...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
Definition Utility.h:58
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
Definition Utility.h:60
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
Definition Utility.h:66
@ OffloadGlobalEntry
Mark the entry as a global entry.
Definition Utility.h:54
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
Definition Utility.h:56
@ OffloadGlobalExtern
Mark the entry as being extern.
Definition Utility.h:62
@ OffloadGlobalConstant
Mark the entry as being constant.
Definition Utility.h:64
LLVM_ABI llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
LLVM_ABI llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
LLVM_ABI llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:157
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ offload_binary
LLVM offload object file.
Definition Magic.h:58