47 #define DEBUG_TYPE "amdgpu-lower-enqueued-block" 54 class AMDGPUOpenCLEnqueuedBlockLowering :
public ModulePass {
58 explicit AMDGPUOpenCLEnqueuedBlockLowering() :
ModulePass(ID) {}
61 bool runOnModule(
Module &M)
override;
72 "Lower OpenCL enqueued blocks",
false,
false)
75 return new AMDGPUOpenCLEnqueuedBlockLowering();
81 for (
auto U : F->
users()) {
82 if (
auto *CI = dyn_cast<CallInst>(&*U)) {
83 auto *Caller = CI->getParent()->getParent();
84 if (Callers.
insert(Caller).second)
93 if (
auto *
I = dyn_cast<Instruction>(U)) {
94 auto *
F =
I->getParent()->getParent();
99 if (!isa<Constant>(U))
101 for (
auto UU : U->
users())
105 bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(
Module &M) {
108 bool Changed =
false;
110 if (
F.hasFnAttribute(
"enqueued-block")) {
117 DEBUG(
dbgs() <<
"found enqueued kernel: " <<
F.getName() <<
'\n');
127 DEBUG(
dbgs() <<
"runtime handle created: " << *GV <<
'\n');
129 for (
auto U :
F.users()) {
131 if (!isa<ConstantExpr>(UU))
134 auto *BitCast = cast<ConstantExpr>(UU);
137 F.addFnAttr(
"runtime-handle", RuntimeHandle);
144 for (
auto F : Callers) {
147 F->addFnAttr(
"calls-enqueue-kernel");
148 DEBUG(
dbgs() <<
"mark enqueue_kernel caller:" <<
F->getName() <<
'\n');
static void collectCallers(Function *F, DenseSet< Function *> &Callers)
Collect direct or indrect callers of F and save them to Callers.
Compute iterated dominance frontiers using a linear time algorithm.
A Module instance is used to store all the information related to an LLVM module. ...
Implements a dense probed hash-table based set.
Externally visible function.
char & AMDGPUOpenCLEnqueuedBlockLoweringID
Address space for global memory (RAT0, VTX0).
constexpr char RuntimeHandle[]
Key for Kernel::Attr::Metadata::mRuntimeHandle.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Calling convention for AMDGPU code object kernels.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVMContext & getContext() const
Get the global data context.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< iterator > functions()
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(const ValueT &V)
INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, "Lower OpenCL enqueued blocks", false, false) ModulePass *llvm
static void collectFunctionUsers(User *U, DenseSet< Function *> &Funcs)
If U is instruction or constant, collect functions which directly or indirectly use it...
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Module.h This file contains the declarations for the Module class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< user_iterator > users()
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
static IntegerType * getInt8Ty(LLVMContext &C)