26#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
30static Function *createInitOrFiniKernelFunction(
Module &M,
bool IsCtor) {
31 StringRef InitOrFiniKernelName =
"amdgcn.device.init";
33 InitOrFiniKernelName =
"amdgcn.device.fini";
34 if (M.getFunction(InitOrFiniKernelName))
41 InitOrFiniKernel->
addFnAttr(
"amdgpu-flat-work-group-size",
"1,1");
43 InitOrFiniKernel->
addFnAttr(
"device-init");
45 InitOrFiniKernel->
addFnAttr(
"device-fini");
46 return InitOrFiniKernel;
72static void createInitOrFiniCalls(
Function &
F,
bool IsCtor) {
80 ArrayType *PtrArrayTy = ArrayType::get(PtrTy, 0);
82 auto *Begin = M.getOrInsertGlobal(
83 IsCtor ?
"__init_array_start" :
"__fini_array_start", PtrArrayTy, [&]() {
88 IsCtor ?
"__init_array_start" :
"__fini_array_start",
89 nullptr, GlobalVariable::NotThreadLocal,
92 auto *
End = M.getOrInsertGlobal(
93 IsCtor ?
"__init_array_end" :
"__fini_array_end", PtrArrayTy, [&]() {
98 IsCtor ?
"__init_array_end" :
"__fini_array_end",
99 nullptr, GlobalVariable::NotThreadLocal,
105 auto *CallBackTy = FunctionType::get(IRB.
getVoidTy(), {});
107 Value *Start = Begin;
112 Type *Int64Ty = IntegerType::getInt64Ty(
C);
115 auto *ByteSize = IRB.
CreateSub(EndPtr, BeginPtr,
"",
true,
117 auto *
Size = IRB.
CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3),
"",
129 IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
133 auto *CallBackPHI = IRB.
CreatePHI(PtrTy, 2,
"ptr");
134 auto *CallBack = IRB.
CreateLoad(
F.getType(), CallBackPHI,
"callback");
138 auto *EndCmp = IRB.
CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
139 NewCallBack, Stop,
"end");
140 CallBackPHI->addIncoming(Start, &
F.getEntryBlock());
141 CallBackPHI->addIncoming(NewCallBack, LoopBB);
156 Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
157 if (!InitOrFiniKernel)
160 createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
166static bool lowerCtorsAndDtors(
Module &M) {
168 Modified |= createInitOrFiniKernel(M,
"llvm.global_ctors",
true);
169 Modified |= createInitOrFiniKernel(M,
"llvm.global_dtors",
false);
173class AMDGPUCtorDtorLoweringLegacy final :
public ModulePass {
188char AMDGPUCtorDtorLoweringLegacy::ID = 0;
190 AMDGPUCtorDtorLoweringLegacy::ID;
192 "Lower ctors and dtors for AMDGPU",
false,
false)
195 return new AMDGPUCtorDtorLoweringLegacy();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
ConstantArray - Constant Array Declarations.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * createWithDefaultAttr(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Creates a function with some attributes recorded in llvm.module.flags and the LLVMContext applied.
void setCallingConv(CallingConv::ID CC)
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ ExternalLinkage
Externally visible function.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
unsigned getNumOperands() const
LLVM Value Representation.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUCtorDtorLoweringLegacyPassID
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.