26#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
32class AMDGPUPromoteKernelArguments :
public FunctionPass {
65void AMDGPUPromoteKernelArguments::enqueueUsers(
Value *
Ptr) {
68 while (!PtrUsers.empty()) {
69 Instruction *
U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
73 switch (
U->getOpcode()) {
76 case Instruction::Load: {
78 if (
LD->getPointerOperand()->stripInBoundsOffsets() ==
Ptr &&
84 case Instruction::GetElementPtr:
85 case Instruction::AddrSpaceCast:
86 case Instruction::BitCast:
87 if (
U->getOperand(0)->stripInBoundsOffsets() ==
Ptr)
88 PtrUsers.append(
U->user_begin(),
U->user_end());
94bool AMDGPUPromoteKernelArguments::promotePointer(
Value *
Ptr) {
99 Changed |= promoteLoad(LI);
121 B.CreateAddrSpaceCast(
Ptr, NewPT,
Twine(
Ptr->getName(),
".global"));
123 B.CreateAddrSpaceCast(Cast, PT,
Twine(
Ptr->getName(),
".flat"));
124 Ptr->replaceUsesWithIf(CastBack,
125 [Cast](
Use &U) {
return U.getUser() != Cast; });
130bool AMDGPUPromoteKernelArguments::promoteLoad(
LoadInst *LI) {
142 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
170 PointerType *PT = dyn_cast<PointerType>(Arg.getType());
176 Ptrs.push_back(&Arg);
179 bool Changed =
false;
180 while (!Ptrs.empty()) {
182 Changed |= promotePointer(
Ptr);
188bool AMDGPUPromoteKernelArguments::runOnFunction(
Function &
F) {
189 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191 return run(
F, MSSA, AA);
195 "AMDGPU Promote Kernel Arguments",
false,
false)
201char AMDGPUPromoteKernelArguments::
ID = 0;
204 return new AMDGPUPromoteKernelArguments();
212 if (AMDGPUPromoteKernelArguments().run(
F, MSSA, AA)) {
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()