28constexpr unsigned CudaFatMagic = 0x466243b1;
29constexpr unsigned HIPFatMagic = 0x48495046;
32 return M.getDataLayout().getIntPtrType(
M.getContext());
47 PointerType::getUnqual(
C), PointerType::getUnqual(
C),
48 PointerType::getUnqual(
C));
53 return PointerType::getUnqual(getDeviceImageTy(M));
68 PointerType::getUnqual(
C), PointerType::getUnqual(
C));
73 return PointerType::getUnqual(getBinDescTy(M));
118 auto [EntriesB, EntriesE] = EntryArray;
125 ImagesInits.
reserve(Bufs.size());
130 GlobalVariable::InternalLinkage, Data,
131 ".omp_offloading.device_image" + Suffix);
132 Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
133 Image->setSection(Relocatable ?
".llvm.offloading.relocatable"
134 :
".llvm.offloading");
139 "Invalid binary format");
148 Binary.bytes_begin() + Header->EntryOffset);
150 auto *Begin = ConstantInt::get(
getSizeTTy(M), Entry->ImageOffset);
152 ConstantInt::get(
getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
162 ImageE, EntriesB, EntriesE));
167 ArrayType::get(getDeviceImageTy(M), ImagesInits.
size()), ImagesInits);
172 ".omp_offloading.device_images" + Suffix);
173 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
186 ".omp_offloading.descriptor" + Suffix);
195 ".omp_offloading.descriptor_unreg" + Suffix, &M);
196 Func->setSection(
".text.startup");
199 auto *UnRegFuncTy = FunctionType::get(
Type::getVoidTy(
C), getBinDescPtrTy(M),
202 M.getOrInsertFunction(
"__tgt_unregister_lib", UnRegFuncTy);
206 Builder.CreateCall(UnRegFuncC, BinDesc);
207 Builder.CreateRetVoid();
217 ".omp_offloading.descriptor_reg" + Suffix, &M);
218 Func->setSection(
".text.startup");
224 M.getOrInsertFunction(
"__tgt_register_lib", RegFuncTy);
226 auto *AtExitTy = FunctionType::get(
230 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
238 Builder.CreateCall(AtExit, UnregFunc);
240 Builder.CreateCall(RegFuncC, BinDesc);
241 Builder.CreateRetVoid();
259 PointerType::getUnqual(
C), PointerType::getUnqual(
C));
273 IsHIP ?
".hip_fatbin"
277 GlobalVariable::InternalLinkage, Data,
278 ".fatbin_image" + Suffix);
279 Fatbin->setSection(FatbinConstantSection);
282 StringRef FatbinWrapperSection = IsHIP ?
".hipFatBinSegment"
284 :
".nvFatBinSegment";
297 FatbinInitializer,
".fatbin_wrapper" + Suffix);
298 FatbinDesc->setSection(FatbinWrapperSection);
299 FatbinDesc->setAlignment(
Align(8));
330 bool EmitSurfacesAndTextures) {
332 auto [EntriesB, EntriesE] = EntryArray;
338 auto *RegFuncTy = FunctionType::get(
341 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
344 IsHIP ?
"__hipRegisterFunction" :
"__cudaRegisterFunction", RegFuncTy);
347 auto *RegVarTy = FunctionType::get(
353 IsHIP ?
"__hipRegisterVar" :
"__cudaRegisterVar", RegVarTy);
358 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
362 IsHIP ?
"__hipRegisterSurface" :
"__cudaRegisterSurface", RegSurfaceTy);
371 IsHIP ?
"__hipRegisterTexture" :
"__cudaRegisterTexture", RegTextureTy);
377 IsHIP ?
".hip.globals_reg" :
".cuda.globals_reg", &M);
378 RegGlobalsFn->setSection(
".text.startup");
392 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
393 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
394 Builder.SetInsertPoint(EntryBB);
395 auto *Entry = Builder.CreatePHI(PointerType::getUnqual(
C), 2,
"entry");
400 auto *
Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr,
"addr");
405 auto *
Name = Builder.CreateLoad(Int8PtrTy, NamePtr,
"name");
421 auto *
Kind = Builder.CreateAnd(
425 auto *ExternBit = Builder.CreateAnd(
428 auto *
Extern = Builder.CreateLShr(
433 auto *
Const = Builder.CreateLShr(
435 auto *NormalizedBit = Builder.CreateAnd(
438 auto *Normalized = Builder.CreateLShr(
441 Builder.CreateICmpEQ(
Size, ConstantInt::getNullValue(
getSizeTTy(M)));
442 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
445 Builder.SetInsertPoint(IfThenBB);
446 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
453 Builder.CreateBr(IfEndBB);
454 Builder.SetInsertPoint(IfElseBB);
456 auto *
Switch = Builder.CreateSwitch(Kind, IfEndBB);
458 Builder.SetInsertPoint(SwGlobalBB);
459 Builder.CreateCall(RegVar,
462 Builder.CreateBr(IfEndBB);
467 Builder.SetInsertPoint(SwManagedBB);
468 Builder.CreateBr(IfEndBB);
472 Builder.SetInsertPoint(SwSurfaceBB);
473 if (EmitSurfacesAndTextures)
474 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
476 Builder.CreateBr(IfEndBB);
481 Builder.SetInsertPoint(SwTextureBB);
482 if (EmitSurfacesAndTextures)
483 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(),
Addr,
Name,
Name,
484 Data, Normalized,
Extern});
485 Builder.CreateBr(IfEndBB);
489 Builder.SetInsertPoint(IfEndBB);
490 auto *NewEntry = Builder.CreateInBoundsGEP(
492 auto *
Cmp = Builder.CreateICmpEQ(
503 &RegGlobalsFn->getEntryBlock());
504 Entry->addIncoming(NewEntry, IfEndBB);
505 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
506 Builder.SetInsertPoint(ExitBB);
507 Builder.CreateRetVoid();
517 bool EmitSurfacesAndTextures) {
522 (IsHIP ?
".hip.fatbin_reg" :
".cuda.fatbin_reg") + Suffix, &M);
523 CtorFunc->setSection(
".text.startup");
528 (IsHIP ?
".hip.fatbin_unreg" :
".cuda.fatbin_unreg") + Suffix, &M);
529 DtorFunc->setSection(
".text.startup");
531 auto *PtrTy = PointerType::getUnqual(
C);
534 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy,
false);
536 IsHIP ?
"__hipRegisterFatBinary" :
"__cudaRegisterFatBinary", RegFatTy);
541 M.getOrInsertFunction(
"__cudaRegisterFatBinaryEnd", RegFatEndTy);
546 IsHIP ?
"__hipUnregisterFatBinary" :
"__cudaUnregisterFatBinary",
556 (IsHIP ?
".hip.binary_handle" :
".cuda.binary_handle") + Suffix);
560 CallInst *Handle = CtorBuilder.CreateCall(
563 CtorBuilder.CreateAlignedStore(
564 Handle, BinaryHandleGlobal,
565 Align(
M.getDataLayout().getPointerTypeSize(PtrTy)));
566 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
568 EmitSurfacesAndTextures),
571 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
572 CtorBuilder.CreateCall(AtExit, DtorFunc);
573 CtorBuilder.CreateRetVoid();
579 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
580 PtrTy, BinaryHandleGlobal,
581 Align(
M.getDataLayout().getPointerTypeSize(PtrTy)));
582 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
583 DtorBuilder.CreateRetVoid();
594 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
597 "No binary descriptors created.");
598 createRegisterFunction(M,
Desc, Suffix);
605 bool EmitSurfacesAndTextures) {
609 "No fatbin section created.");
611 createRegisterFatbinFunction(M,
Desc,
false, EntryArray, Suffix,
612 EmitSurfacesAndTextures);
618 bool EmitSurfacesAndTextures) {
622 "No fatbin section created.");
624 createRegisterFatbinFunction(M,
Desc,
true, EntryArray, Suffix,
625 EmitSurfacesAndTextures);
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
@ InternalLinkage
Rename collisions when linking (static functions).
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
A Module instance is used to store all the information related to an LLVM module.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Class to represent struct types.
static StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Triple - Helper class for working with autoconf configuration names.
bool isMacOSX() const
Is this a Mac OS X triple.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static uint64_t getAlignment()
@ C
The default llvm calling convention, compatible with C.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
@ OffloadGlobalEntry
Mark the entry as a global entry.
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
@ OffloadGlobalExtern
Mark the entry as being extern.
@ OffloadGlobalConstant
Mark the entry as being constant.
llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
@ offload_binary
LLVM offload object file.