28constexpr unsigned CudaFatMagic = 0x466243b1;
29constexpr unsigned HIPFatMagic = 0x48495046;
32 return M.getDataLayout().getIntPtrType(
M.getContext());
47 PointerType::getUnqual(
C), PointerType::getUnqual(
C),
48 PointerType::getUnqual(
C));
53 return PointerType::getUnqual(getDeviceImageTy(M));
68 PointerType::getUnqual(
C), PointerType::getUnqual(
C));
73 return PointerType::getUnqual(getBinDescTy(M));
118 auto [EntriesB, EntriesE] = EntryArray;
125 ImagesInits.
reserve(Bufs.size());
130 GlobalVariable::InternalLinkage, Data,
131 ".omp_offloading.device_image" + Suffix);
132 Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
133 Image->setSection(Relocatable ?
".llvm.offloading.relocatable"
134 :
".llvm.offloading");
139 "Invalid binary format");
148 Binary.bytes_begin() + Header->EntryOffset);
162 ImageE, EntriesB, EntriesE));
167 ArrayType::get(getDeviceImageTy(M), ImagesInits.
size()), ImagesInits);
172 ".omp_offloading.device_images" + Suffix);
173 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
186 ".omp_offloading.descriptor" + Suffix);
195 ".omp_offloading.descriptor_unreg" + Suffix, &M);
196 Func->setSection(
".text.startup");
199 auto *UnRegFuncTy = FunctionType::get(
Type::getVoidTy(
C), getBinDescPtrTy(M),
202 M.getOrInsertFunction(
"__tgt_unregister_lib", UnRegFuncTy);
206 Builder.CreateCall(UnRegFuncC, BinDesc);
207 Builder.CreateRetVoid();
217 ".omp_offloading.descriptor_reg" + Suffix, &M);
218 Func->setSection(
".text.startup");
224 M.getOrInsertFunction(
"__tgt_register_lib", RegFuncTy);
226 auto *AtExitTy = FunctionType::get(
230 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
235 Builder.CreateCall(RegFuncC, BinDesc);
241 Builder.CreateCall(AtExit, UnregFunc);
242 Builder.CreateRetVoid();
260 PointerType::getUnqual(
C), PointerType::getUnqual(
C));
274 IsHIP ?
".hip_fatbin"
278 GlobalVariable::InternalLinkage, Data,
279 ".fatbin_image" + Suffix);
280 Fatbin->setSection(FatbinConstantSection);
283 StringRef FatbinWrapperSection = IsHIP ?
".hipFatBinSegment"
285 :
".nvFatBinSegment";
298 FatbinInitializer,
".fatbin_wrapper" + Suffix);
299 FatbinDesc->setSection(FatbinWrapperSection);
300 FatbinDesc->setAlignment(
Align(8));
331 bool EmitSurfacesAndTextures) {
333 auto [EntriesB, EntriesE] = EntryArray;
339 auto *RegFuncTy = FunctionType::get(
342 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
345 IsHIP ?
"__hipRegisterFunction" :
"__cudaRegisterFunction", RegFuncTy);
348 auto *RegVarTy = FunctionType::get(
354 IsHIP ?
"__hipRegisterVar" :
"__cudaRegisterVar", RegVarTy);
359 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
363 IsHIP ?
"__hipRegisterSurface" :
"__cudaRegisterSurface", RegSurfaceTy);
372 IsHIP ?
"__hipRegisterTexture" :
"__cudaRegisterTexture", RegTextureTy);
378 IsHIP ?
".hip.globals_reg" :
".cuda.globals_reg", &M);
379 RegGlobalsFn->setSection(
".text.startup");
393 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
394 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
395 Builder.SetInsertPoint(EntryBB);
396 auto *
Entry = Builder.CreatePHI(PointerType::getUnqual(
C), 2,
"entry");
401 auto *
Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr,
"addr");
406 auto *
Name = Builder.CreateLoad(Int8PtrTy, NamePtr,
"name");
422 auto *
Kind = Builder.CreateAnd(
426 auto *ExternBit = Builder.CreateAnd(
429 auto *
Extern = Builder.CreateLShr(
434 auto *
Const = Builder.CreateLShr(
436 auto *NormalizedBit = Builder.CreateAnd(
439 auto *Normalized = Builder.CreateLShr(
442 Builder.CreateICmpEQ(
Size, ConstantInt::getNullValue(
getSizeTTy(M)));
443 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
446 Builder.SetInsertPoint(IfThenBB);
447 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
454 Builder.CreateBr(IfEndBB);
455 Builder.SetInsertPoint(IfElseBB);
457 auto *
Switch = Builder.CreateSwitch(Kind, IfEndBB);
459 Builder.SetInsertPoint(SwGlobalBB);
460 Builder.CreateCall(RegVar,
463 Builder.CreateBr(IfEndBB);
468 Builder.SetInsertPoint(SwManagedBB);
469 Builder.CreateBr(IfEndBB);
473 Builder.SetInsertPoint(SwSurfaceBB);
474 if (EmitSurfacesAndTextures)
475 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
477 Builder.CreateBr(IfEndBB);
482 Builder.SetInsertPoint(SwTextureBB);
483 if (EmitSurfacesAndTextures)
484 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
485 Data, Normalized,
Extern});
486 Builder.CreateBr(IfEndBB);
490 Builder.SetInsertPoint(IfEndBB);
491 auto *NewEntry = Builder.CreateInBoundsGEP(
493 auto *
Cmp = Builder.CreateICmpEQ(
504 &RegGlobalsFn->getEntryBlock());
505 Entry->addIncoming(NewEntry, IfEndBB);
506 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
507 Builder.SetInsertPoint(ExitBB);
508 Builder.CreateRetVoid();
518 bool EmitSurfacesAndTextures) {
523 (IsHIP ?
".hip.fatbin_reg" :
".cuda.fatbin_reg") + Suffix, &M);
524 CtorFunc->setSection(
".text.startup");
529 (IsHIP ?
".hip.fatbin_unreg" :
".cuda.fatbin_unreg") + Suffix, &M);
530 DtorFunc->setSection(
".text.startup");
532 auto *PtrTy = PointerType::getUnqual(
C);
535 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy,
false);
537 IsHIP ?
"__hipRegisterFatBinary" :
"__cudaRegisterFatBinary", RegFatTy);
542 M.getOrInsertFunction(
"__cudaRegisterFatBinaryEnd", RegFatEndTy);
547 IsHIP ?
"__hipUnregisterFatBinary" :
"__cudaUnregisterFatBinary",
557 (IsHIP ?
".hip.binary_handle" :
".cuda.binary_handle") + Suffix);
561 CallInst *Handle = CtorBuilder.CreateCall(
564 CtorBuilder.CreateAlignedStore(
565 Handle, BinaryHandleGlobal,
566 Align(
M.getDataLayout().getPointerTypeSize(PtrTy)));
567 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
569 EmitSurfacesAndTextures),
572 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
573 CtorBuilder.CreateCall(AtExit, DtorFunc);
574 CtorBuilder.CreateRetVoid();
580 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
581 PtrTy, BinaryHandleGlobal,
582 Align(
M.getDataLayout().getPointerTypeSize(PtrTy)));
583 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
584 DtorBuilder.CreateRetVoid();
595 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
598 "No binary descriptors created.");
599 createRegisterFunction(M,
Desc, Suffix);
606 bool EmitSurfacesAndTextures) {
610 "No fatbin section created.");
612 createRegisterFatbinFunction(M,
Desc,
false, EntryArray, Suffix,
613 EmitSurfacesAndTextures);
619 bool EmitSurfacesAndTextures) {
623 "No fatbin section created.");
625 createRegisterFatbinFunction(M,
Desc,
true, EntryArray, Suffix,
626 EmitSurfacesAndTextures);
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
@ InternalLinkage
Rename collisions when linking (static functions).
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
A Module instance is used to store all the information related to an LLVM module.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Class to represent struct types.
static StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Triple - Helper class for working with autoconf configuration names.
bool isMacOSX() const
Is this a Mac OS X triple.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static uint64_t getAlignment()
@ C
The default llvm calling convention, compatible with C.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
@ OffloadGlobalEntry
Mark the entry as a global entry.
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
@ OffloadGlobalExtern
Mark the entry as being extern.
@ OffloadGlobalConstant
Mark the entry as being constant.
llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
@ offload_binary
LLVM offload object file.