23 #define DEBUG_TYPE "amdgpu-promote-alloca"
35 int LocalMemAvailable;
39 LocalMemAvailable(0) { }
40 bool doInitialization(
Module &M)
override;
42 const char *getPassName()
const override {
return "AMDGPU Promote Alloca"; }
50 bool AMDGPUPromoteAlloca::doInitialization(
Module &M) {
55 bool AMDGPUPromoteAlloca::runOnFunction(
Function &
F) {
59 LocalMemAvailable =
ST.getLocalMemorySize();
65 for (
unsigned i = 0, e = FTy->
getNumParams(); i != e; ++i) {
69 LocalMemAvailable = 0;
70 DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
71 "local memory disabled.\n");
76 if (LocalMemAvailable > 0) {
79 E = Mod->global_end();
I != E; ++
I) {
85 UE = GV->
use_end(); U != UE; ++U) {
96 LocalMemAvailable = std::max(0, LocalMemAvailable);
97 DEBUG(
dbgs() << LocalMemAvailable <<
"bytes free in local memory.\n");
111 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
112 if (isa<AllocaInst>(Ptr))
117 auto I = GEPIdx.find(GEP);
118 return I == GEPIdx.end() ?
nullptr : I->second;
141 case Instruction::BitCast:
142 case Instruction::AddrSpaceCast:
152 DEBUG(
dbgs() <<
"Alloca Candidate for vectorization \n");
160 DEBUG(
dbgs() <<
" Cannot convert type to vector");
164 std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
165 std::vector<Value*> WorkList;
166 for (
User *AllocaUser : Alloca->
users()) {
172 WorkList.push_back(AllocaUser);
181 DEBUG(
dbgs() <<
" Cannot compute vector index for GEP " << *GEP <<
'\n');
185 GEPVectorIdx[
GEP] = Index;
186 for (
User *GEPUser : AllocaUser->
users()) {
190 WorkList.push_back(GEPUser);
196 DEBUG(
dbgs() <<
" Converting alloca to vector "
197 << *AllocaTy <<
" -> " << *VectorTy <<
'\n');
199 for (std::vector<Value*>::iterator
I = WorkList.begin(),
200 E = WorkList.end();
I != E; ++
I) {
226 case Instruction::BitCast:
227 case Instruction::AddrSpaceCast:
241 if(std::find(WorkList.begin(), WorkList.end(),
User) != WorkList.end())
243 if (isa<CallInst>(
User)) {
244 WorkList.push_back(
User);
250 if (UseInst && UseInst->
getOpcode() == Instruction::PtrToInt)
256 WorkList.push_back(
User);
263 void AMDGPUPromoteAlloca::visitAlloca(
AllocaInst &
I) {
269 DEBUG(
dbgs() <<
"Trying to promote " << I <<
'\n');
274 DEBUG(
dbgs() <<
" alloca is not a candidate for vectorization.\n");
279 unsigned WorkGroupSize = 256;
281 WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
283 if (AllocaSize > LocalMemAvailable) {
284 DEBUG(
dbgs() <<
" Not enough local memory to promote alloca.\n");
288 std::vector<Value*> WorkList;
291 DEBUG(
dbgs() <<
" Do not know how to convert all uses\n");
295 DEBUG(
dbgs() <<
"Promoting alloca to local memory\n");
296 LocalMemAvailable -= AllocaSize;
308 Value *ReadLocalSizeY = Mod->getOrInsertFunction(
309 "llvm.r600.read.local.size.y", FTy, AttrSet);
310 Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
311 "llvm.r600.read.local.size.z", FTy, AttrSet);
312 Value *ReadTIDIGX = Mod->getOrInsertFunction(
313 "llvm.r600.read.tidig.x", FTy, AttrSet);
314 Value *ReadTIDIGY = Mod->getOrInsertFunction(
315 "llvm.r600.read.tidig.y", FTy, AttrSet);
316 Value *ReadTIDIGZ = Mod->getOrInsertFunction(
317 "llvm.r600.read.tidig.z", FTy, AttrSet);
319 Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
320 Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
321 Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
322 Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
323 Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
325 Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
326 Tmp0 = Builder.CreateMul(Tmp0, TIdX);
327 Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
328 Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
329 TID = Builder.CreateAdd(TID, TIdZ);
331 std::vector<Value*> Indices;
333 Indices.push_back(TID);
335 Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
340 for (std::vector<Value*>::iterator i = WorkList.begin(),
341 e = WorkList.end(); i != e; ++i) {
349 if (isa<AddrSpaceCastInst>(V))
360 std::vector<Type*> ArgTypes;
362 ArgIdx != ArgEnd; ++ArgIdx) {
375 Builder.SetInsertPoint(Intr);
377 case Intrinsic::lifetime_start:
378 case Intrinsic::lifetime_end:
382 case Intrinsic::memcpy: {
390 case Intrinsic::memset: {
406 return new AMDGPUPromoteAlloca(ST);
unsigned getAlignment() const
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
use_iterator_impl< Use > use_iterator
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Base class for instruction visitors.
AMDGPU specific subclass of TargetSubtarget.
A Module instance is used to store all the information related to an LLVM module. ...
unsigned getNumParams() const
getNumParams - Return the number of fixed parameters this function type requires. ...
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Value * getValue() const
get* - Return the arguments to the instruction.
unsigned getNumOperands() const
CallInst - This class represents a function call, abstracting a target machine's calling convention...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Externally visible function.
MemSetInst - This class wraps the llvm.memset intrinsic.
const Function * getParent() const
Return the enclosing method, or null if none.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Address space for local memory.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getPointerElementType() const
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
static Constant * getNullValue(Type *Ty)
StringRef getName() const
Return a constant reference to the value's name.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getArrayElementType() const
A Use represents the edge between a Value definition and its users.
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
Function does not access memory.
FunctionType - Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
FunctionType::get - This static method is the primary way of constructing a FunctionType.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Type * getElementType() const
PointerType - Class to represent pointers.
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Type * getParamType(unsigned i) const
Parameter type accessors.
The instances of the Type class are immutable: once they are created, they are never changed...
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
This is an important base class in LLVM.
Value * getRawDest() const
FunctionPass class - This class is used to implement most global optimizations.
Value * getOperand(unsigned i) const
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
LLVMContext & getContext() const
All values hold a context through their type.
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
void dump() const
Support for debugging, callable in GDB: V->dump()
This is the shared class of boolean and integer constants.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Type * getType() const
All values are typed, get the type of this value.
Value * getLength() const
MemCpyInst - This class wraps the llvm.memcpy intrinsic.
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
AttributeSet getAttributes() const
Return the attribute list for this Function.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
VectorType - Class to represent vector types.
iterator_range< user_iterator > users()
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void setCalledFunction(Value *Fn)
setCalledFunction - Set the function called.
PointerType * getType() const
Global values are always pointers.
AttributeSet addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Attr) const
Add an attribute to the attribute set at the given index.
static IntegerType * getInt32Ty(LLVMContext &C)
FunctionType * getFunctionType() const
static ArrayType * get(Type *ElementType, uint64_t NumElements)
ArrayType::get - This static method is the primary way to construct an ArrayType. ...
uint64_t getArrayNumElements() const
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Value * getRawSource() const
get* - Return the arguments to the instruction.
LLVM Value Representation.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
C - The default llvm calling convention, compatible with C.
Type * getAllocatedType() const
getAllocatedType - Return the type that is being allocated by the instruction.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
const BasicBlock * getParent() const
FunctionPass * createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST)
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
AllocaInst - an instruction to allocate memory on the stack.