LLVM  3.7.0
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass eliminates allocas by either converting them into vectors or
11 // by migrating them to local address space.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstVisitor.h"
20 #include "llvm/Support/Debug.h"
22 
23 #define DEBUG_TYPE "amdgpu-promote-alloca"
24 
25 using namespace llvm;
26 
27 namespace {
28 
29 class AMDGPUPromoteAlloca : public FunctionPass,
30  public InstVisitor<AMDGPUPromoteAlloca> {
31 
32  static char ID;
33  Module *Mod;
34  const AMDGPUSubtarget &ST;
35  int LocalMemAvailable;
36 
37 public:
38  AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
39  LocalMemAvailable(0) { }
40  bool doInitialization(Module &M) override;
41  bool runOnFunction(Function &F) override;
42  const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
43  void visitAlloca(AllocaInst &I);
44 };
45 
46 } // End anonymous namespace
47 
49 
50 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
51  Mod = &M;
52  return false;
53 }
54 
55 bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
56 
57  const FunctionType *FTy = F.getFunctionType();
58 
59  LocalMemAvailable = ST.getLocalMemorySize();
60 
61 
62  // If the function has any arguments in the local address space, then it's
63  // possible these arguments require the entire local memory space, so
64  // we cannot use local memory in the pass.
65  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
66  const Type *ParamTy = FTy->getParamType(i);
67  if (ParamTy->isPointerTy() &&
69  LocalMemAvailable = 0;
70  DEBUG(dbgs() << "Function has local memory argument. Promoting to "
71  "local memory disabled.\n");
72  break;
73  }
74  }
75 
76  if (LocalMemAvailable > 0) {
77  // Check how much local memory is being used by global objects
78  for (Module::global_iterator I = Mod->global_begin(),
79  E = Mod->global_end(); I != E; ++I) {
80  GlobalVariable *GV = I;
81  PointerType *GVTy = GV->getType();
83  continue;
84  for (Value::use_iterator U = GV->use_begin(),
85  UE = GV->use_end(); U != UE; ++U) {
87  if (!Use)
88  continue;
89  if (Use->getParent()->getParent() == &F)
90  LocalMemAvailable -=
91  Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
92  }
93  }
94  }
95 
96  LocalMemAvailable = std::max(0, LocalMemAvailable);
97  DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
98 
99  visit(F);
100 
101  return false;
102 }
103 
104 static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
105  return VectorType::get(ArrayTy->getArrayElementType(),
106  ArrayTy->getArrayNumElements());
107 }
108 
109 static Value *
111  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
112  if (isa<AllocaInst>(Ptr))
114 
115  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
116 
117  auto I = GEPIdx.find(GEP);
118  return I == GEPIdx.end() ? nullptr : I->second;
119 }
120 
122  // FIXME we only support simple cases
123  if (GEP->getNumOperands() != 3)
124  return NULL;
125 
126  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
127  if (!I0 || !I0->isZero())
128  return NULL;
129 
130  return GEP->getOperand(2);
131 }
132 
133 // Not an instruction handled below to turn into a vector.
134 //
135 // TODO: Check isTriviallyVectorizable for calls and handle other
136 // instructions.
137 static bool canVectorizeInst(Instruction *Inst) {
138  switch (Inst->getOpcode()) {
139  case Instruction::Load:
140  case Instruction::Store:
141  case Instruction::BitCast:
142  case Instruction::AddrSpaceCast:
143  return true;
144  default:
145  return false;
146  }
147 }
148 
149 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
150  Type *AllocaTy = Alloca->getAllocatedType();
151 
152  DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
153 
154  // FIXME: There is no reason why we can't support larger arrays, we
155  // are just being conservative for now.
156  if (!AllocaTy->isArrayTy() ||
157  AllocaTy->getArrayElementType()->isVectorTy() ||
158  AllocaTy->getArrayNumElements() > 4) {
159 
160  DEBUG(dbgs() << " Cannot convert type to vector");
161  return false;
162  }
163 
164  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
165  std::vector<Value*> WorkList;
166  for (User *AllocaUser : Alloca->users()) {
168  if (!GEP) {
169  if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
170  return false;
171 
172  WorkList.push_back(AllocaUser);
173  continue;
174  }
175 
176  Value *Index = GEPToVectorIndex(GEP);
177 
178  // If we can't compute a vector index from this GEP, then we can't
179  // promote this alloca to vector.
180  if (!Index) {
181  DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
182  return false;
183  }
184 
185  GEPVectorIdx[GEP] = Index;
186  for (User *GEPUser : AllocaUser->users()) {
187  if (!canVectorizeInst(cast<Instruction>(GEPUser)))
188  return false;
189 
190  WorkList.push_back(GEPUser);
191  }
192  }
193 
194  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
195 
196  DEBUG(dbgs() << " Converting alloca to vector "
197  << *AllocaTy << " -> " << *VectorTy << '\n');
198 
199  for (std::vector<Value*>::iterator I = WorkList.begin(),
200  E = WorkList.end(); I != E; ++I) {
201  Instruction *Inst = cast<Instruction>(*I);
202  IRBuilder<> Builder(Inst);
203  switch (Inst->getOpcode()) {
204  case Instruction::Load: {
205  Value *Ptr = Inst->getOperand(0);
206  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
207  Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
208  Value *VecValue = Builder.CreateLoad(BitCast);
209  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
210  Inst->replaceAllUsesWith(ExtractElement);
211  Inst->eraseFromParent();
212  break;
213  }
214  case Instruction::Store: {
215  Value *Ptr = Inst->getOperand(1);
216  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
217  Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
218  Value *VecValue = Builder.CreateLoad(BitCast);
219  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
220  Inst->getOperand(0),
221  Index);
222  Builder.CreateStore(NewVecValue, BitCast);
223  Inst->eraseFromParent();
224  break;
225  }
226  case Instruction::BitCast:
227  case Instruction::AddrSpaceCast:
228  break;
229 
230  default:
231  Inst->dump();
232  llvm_unreachable("Inconsistency in instructions promotable to vector");
233  }
234  }
235  return true;
236 }
237 
238 static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
239  bool Success = true;
240  for (User *User : Val->users()) {
241  if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
242  continue;
243  if (isa<CallInst>(User)) {
244  WorkList.push_back(User);
245  continue;
246  }
247 
248  // FIXME: Correctly handle ptrtoint instructions.
249  Instruction *UseInst = dyn_cast<Instruction>(User);
250  if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
251  return false;
252 
253  if (!User->getType()->isPointerTy())
254  continue;
255 
256  WorkList.push_back(User);
257 
258  Success &= collectUsesWithPtrTypes(User, WorkList);
259  }
260  return Success;
261 }
262 
263 void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
264  IRBuilder<> Builder(&I);
265 
266  // First try to replace the alloca with a vector
267  Type *AllocaTy = I.getAllocatedType();
268 
269  DEBUG(dbgs() << "Trying to promote " << I << '\n');
270 
271  if (tryPromoteAllocaToVector(&I))
272  return;
273 
274  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
275 
276  // FIXME: This is the maximum work group size. We should try to get
277  // value from the reqd_work_group_size function attribute if it is
278  // available.
279  unsigned WorkGroupSize = 256;
280  int AllocaSize =
281  WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
282 
283  if (AllocaSize > LocalMemAvailable) {
284  DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
285  return;
286  }
287 
288  std::vector<Value*> WorkList;
289 
290  if (!collectUsesWithPtrTypes(&I, WorkList)) {
291  DEBUG(dbgs() << " Do not know how to convert all uses\n");
292  return;
293  }
294 
295  DEBUG(dbgs() << "Promoting alloca to local memory\n");
296  LocalMemAvailable -= AllocaSize;
297 
298  Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
299  GlobalVariable *GV = new GlobalVariable(
300  *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
302 
304  Type::getInt32Ty(Mod->getContext()), false);
305  AttributeSet AttrSet;
306  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
307 
308  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
309  "llvm.r600.read.local.size.y", FTy, AttrSet);
310  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
311  "llvm.r600.read.local.size.z", FTy, AttrSet);
312  Value *ReadTIDIGX = Mod->getOrInsertFunction(
313  "llvm.r600.read.tidig.x", FTy, AttrSet);
314  Value *ReadTIDIGY = Mod->getOrInsertFunction(
315  "llvm.r600.read.tidig.y", FTy, AttrSet);
316  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
317  "llvm.r600.read.tidig.z", FTy, AttrSet);
318 
319  Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
320  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
321  Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
322  Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
323  Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
324 
325  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
326  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
327  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
328  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
329  TID = Builder.CreateAdd(TID, TIdZ);
330 
331  std::vector<Value*> Indices;
332  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
333  Indices.push_back(TID);
334 
335  Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
336  I.mutateType(Offset->getType());
337  I.replaceAllUsesWith(Offset);
338  I.eraseFromParent();
339 
340  for (std::vector<Value*>::iterator i = WorkList.begin(),
341  e = WorkList.end(); i != e; ++i) {
342  Value *V = *i;
344  if (!Call) {
345  Type *EltTy = V->getType()->getPointerElementType();
347 
348  // The operand's value should be corrected on its own.
349  if (isa<AddrSpaceCastInst>(V))
350  continue;
351 
352  // FIXME: It doesn't really make sense to try to do this for all
353  // instructions.
354  V->mutateType(NewTy);
355  continue;
356  }
357 
359  if (!Intr) {
360  std::vector<Type*> ArgTypes;
361  for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
362  ArgIdx != ArgEnd; ++ArgIdx) {
363  ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
364  }
365  Function *F = Call->getCalledFunction();
366  FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
367  F->isVarArg());
368  Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
369  NewType, F->getAttributes());
370  Function *NewF = cast<Function>(C);
371  Call->setCalledFunction(NewF);
372  continue;
373  }
374 
375  Builder.SetInsertPoint(Intr);
376  switch (Intr->getIntrinsicID()) {
377  case Intrinsic::lifetime_start:
378  case Intrinsic::lifetime_end:
379  // These intrinsics are for address space 0 only
380  Intr->eraseFromParent();
381  continue;
382  case Intrinsic::memcpy: {
383  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
384  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
385  MemCpy->getLength(), MemCpy->getAlignment(),
386  MemCpy->isVolatile());
387  Intr->eraseFromParent();
388  continue;
389  }
390  case Intrinsic::memset: {
391  MemSetInst *MemSet = cast<MemSetInst>(Intr);
392  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
393  MemSet->getLength(), MemSet->getAlignment(),
394  MemSet->isVolatile());
395  Intr->eraseFromParent();
396  continue;
397  }
398  default:
399  Intr->dump();
400  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
401  }
402  }
403 }
404 
406  return new AMDGPUPromoteAlloca(ST);
407 }
unsigned getAlignment() const
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
use_iterator use_end()
Definition: Value.h:281
use_iterator_impl< Use > use_iterator
Definition: Value.h:277
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Definition: IRBuilder.h:973
Base class for instruction visitors.
Definition: InstVisitor.h:81
AMDGPU specific subclass of TargetSubtarget.
bool isVolatile() const
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
unsigned getNumParams() const
getNumParams - Return the number of fixed parameters this function type requires. ...
Definition: DerivedTypes.h:136
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:44
Value * getValue() const
get* - Return the arguments to the instruction.
unsigned getNumOperands() const
Definition: User.h:138
CallInst - This class represents a function call, abstracting a target machine's calling convention...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
Externally visible function.
Definition: GlobalValue.h:40
MemSetInst - This class wraps the llvm.memset intrinsic.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:472
Address space for local memory.
Definition: AMDGPU.h:112
Hexagon Common GEP
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
Type * getPointerElementType() const
Definition: Type.h:366
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1508
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:178
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value * > &GEPIdx)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
Type * getArrayElementType() const
Definition: Type.h:361
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
Function does not access memory.
Definition: Attributes.h:99
static bool tryPromoteAllocaToVector(AllocaInst *Alloca)
FunctionType - Class to represent function types.
Definition: DerivedTypes.h:96
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
FunctionType::get - This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
Definition: Type.h:213
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:985
Type * getElementType() const
Definition: DerivedTypes.h:323
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:131
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
This is an important base class in LLVM.
Definition: Constant.h:41
Value * getRawDest() const
static VectorType * arrayTypeToVecType(const Type *ArrayTy)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
static bool collectUsesWithPtrTypes(Value *Val, std::vector< Value * > &WorkList)
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
Definition: Type.cpp:764
static bool canVectorizeInst(Instruction *Inst)
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3353
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1495
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1253
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Value * getLength() const
MemCpyInst - This class wraps the llvm.memcpy intrinsic.
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:161
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:181
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
iterator_range< user_iterator > users()
Definition: Value.h:300
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
#define Success
use_iterator use_begin()
Definition: Value.h:279
void setCalledFunction(Value *Fn)
setCalledFunction - Set the function called.
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
AttributeSet addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Attr) const
Add an attribute to the attribute set at the given index.
Definition: Attributes.cpp:753
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
#define I(x, y, z)
Definition: MD5.cpp:54
FunctionType * getFunctionType() const
Definition: Function.cpp:227
static ArrayType * get(Type *ElementType, uint64_t NumElements)
ArrayType::get - This static method is the primary way to construct an ArrayType. ...
Definition: Type.cpp:686
uint64_t getArrayNumElements() const
Definition: Type.cpp:208
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:471
Value * getRawSource() const
get* - Return the arguments to the instruction.
LLVM Value Representation.
Definition: Value.h:69
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
#define DEBUG(X)
Definition: Debug.h:92
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type * getAllocatedType() const
getAllocatedType - Return the type that is being allocated by the instruction.
Definition: Instructions.h:122
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.cpp:229
const BasicBlock * getParent() const
Definition: Instruction.h:72
FunctionPass * createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST)
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76