26#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
30static Function *createInitOrFiniKernelFunction(
Module &M,
bool IsCtor) {
31 StringRef InitOrFiniKernelName =
"amdgcn.device.init";
33 InitOrFiniKernelName =
"amdgcn.device.fini";
34 if (M.getFunction(InitOrFiniKernelName))
41 InitOrFiniKernel->
addFnAttr(
"amdgpu-flat-work-group-size",
"1,1");
43 InitOrFiniKernel->
addFnAttr(
"device-init");
45 InitOrFiniKernel->
addFnAttr(
"device-fini");
46 return InitOrFiniKernel;
72static void createInitOrFiniCalls(
Function &
F,
bool IsCtor) {
81 auto *Begin = M.getOrInsertGlobal(
82 IsCtor ?
"__init_array_start" :
"__fini_array_start",
83 ArrayType::get(PtrTy, 0), [&]() {
85 M, ArrayType::get(PtrTy, 0),
88 IsCtor ?
"__init_array_start" :
"__fini_array_start",
89 nullptr, GlobalVariable::NotThreadLocal,
92 auto *
End = M.getOrInsertGlobal(
93 IsCtor ?
"__init_array_end" :
"__fini_array_end",
94 ArrayType::get(PtrTy, 0), [&]() {
96 M, ArrayType::get(PtrTy, 0),
99 IsCtor ?
"__init_array_end" :
"__fini_array_end",
100 nullptr, GlobalVariable::NotThreadLocal,
106 auto *CallBackTy = FunctionType::get(IRB.
getVoidTy(), {});
108 Value *Start = Begin;
113 Type *Int64Ty = IntegerType::getInt64Ty(
C);
116 auto *ByteSize = IRB.
CreateSub(EndPtr, BeginPtr);
117 auto *
Size = IRB.
CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
120 ArrayType::get(IRB.
getPtrTy(), 0), Begin,
126 IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
130 auto *CallBackPHI = IRB.
CreatePHI(PtrTy, 2,
"ptr");
132 CallBackPHI,
"callback");
136 auto *EndCmp = IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
137 NewCallBack, Stop,
"end");
138 CallBackPHI->addIncoming(Start, &
F.getEntryBlock());
139 CallBackPHI->addIncoming(NewCallBack, LoopBB);
154 Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
155 if (!InitOrFiniKernel)
158 createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
164static bool lowerCtorsAndDtors(
Module &M) {
166 Modified |= createInitOrFiniKernel(M,
"llvm.global_ctors",
true);
167 Modified |= createInitOrFiniKernel(M,
"llvm.global_dtors",
false);
171class AMDGPUCtorDtorLoweringLegacy final :
public ModulePass {
186char AMDGPUCtorDtorLoweringLegacy::ID = 0;
188 AMDGPUCtorDtorLoweringLegacy::ID;
190 "Lower ctors and dtors for AMDGPU",
false,
false)
193 return new AMDGPUCtorDtorLoweringLegacy();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
ConstantArray - Constant Array Declarations.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * createWithDefaultAttr(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Creates a function with some attributes recorded in llvm.module.flags and the LLVMContext applied.
void setCallingConv(CallingConv::ID CC)
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ ExternalLinkage
Externally visible function.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
unsigned getNumOperands() const
LLVM Value Representation.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUCtorDtorLoweringLegacyPassID
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.