26#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
30static Function *createInitOrFiniKernelFunction(
Module &M,
bool IsCtor) {
31 StringRef InitOrFiniKernelName =
"amdgcn.device.init";
33 InitOrFiniKernelName =
"amdgcn.device.fini";
34 if (M.getFunction(InitOrFiniKernelName))
41 InitOrFiniKernel->
addFnAttr(
"amdgpu-flat-work-group-size",
"1,1");
43 InitOrFiniKernel->
addFnAttr(
"device-init");
45 InitOrFiniKernel->
addFnAttr(
"device-fini");
46 return InitOrFiniKernel;
72static void createInitOrFiniCalls(
Function &
F,
bool IsCtor) {
81 auto *Begin = M.getOrInsertGlobal(
82 IsCtor ?
"__init_array_start" :
"__fini_array_start",
83 ArrayType::get(PtrTy, 0), [&]() {
85 M, ArrayType::get(PtrTy, 0),
88 IsCtor ?
"__init_array_start" :
"__fini_array_start",
89 nullptr, GlobalVariable::NotThreadLocal,
92 auto *
End = M.getOrInsertGlobal(
93 IsCtor ?
"__init_array_end" :
"__fini_array_end",
94 ArrayType::get(PtrTy, 0), [&]() {
96 M, ArrayType::get(PtrTy, 0),
99 IsCtor ?
"__init_array_end" :
"__fini_array_end",
100 nullptr, GlobalVariable::NotThreadLocal,
106 auto *CallBackTy = FunctionType::get(IRB.
getVoidTy(), {});
108 Value *Start = Begin;
113 Type *Int64Ty = IntegerType::getInt64Ty(
C);
116 auto *ByteSize = IRB.
CreateSub(EndPtr, BeginPtr);
117 auto *
Size = IRB.
CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
120 ArrayType::get(IRB.
getPtrTy(), 0), Begin,
126 IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
130 auto *CallBackPHI = IRB.
CreatePHI(PtrTy, 2,
"ptr");
132 CallBackPHI,
"callback");
136 auto *EndCmp = IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
137 NewCallBack, Stop,
"end");
138 CallBackPHI->addIncoming(Start, &
F.getEntryBlock());
139 CallBackPHI->addIncoming(NewCallBack, LoopBB);
154 Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
155 if (!InitOrFiniKernel)
158 createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
164static bool lowerCtorsAndDtors(
Module &M) {
166 Modified |= createInitOrFiniKernel(M,
"llvm.global_ctors",
true);
167 Modified |= createInitOrFiniKernel(M,
"llvm.global_dtors",
false);
171class AMDGPUCtorDtorLoweringLegacy final :
public ModulePass {
186char AMDGPUCtorDtorLoweringLegacy::ID = 0;
188 AMDGPUCtorDtorLoweringLegacy::ID;
190 "Lower ctors and dtors for AMDGPU",
false,
false)
193 return new AMDGPUCtorDtorLoweringLegacy();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
ConstantArray - Constant Array Declarations.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * createWithDefaultAttr(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Creates a function with some attributes recorded in llvm.module.flags and the LLVMContext applied.
void setCallingConv(CallingConv::ID CC)
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ ExternalLinkage
Externally visible function.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
unsigned getNumOperands() const
LLVM Value Representation.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUCtorDtorLoweringLegacyPassID
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.